## speak onetime if person is apper

In [1]:
import cv2
import os
import time
import pyttsx3

# Initialize text-to-speech engine
engine = pyttsx3.init()

# Define paths to the model files
prototxt_path = r'gender_pre_trained_models\gender_deploy.prototxt'
caffemodel_path = r'gender_pre_trained_models\gender_net.caffemodel'

# Check if files exist
if not os.path.isfile(prototxt_path):
    raise FileNotFoundError(f"Cannot find the prototxt file at {prototxt_path}")
if not os.path.isfile(caffemodel_path):
    raise FileNotFoundError(f"Cannot find the caffemodel file at {caffemodel_path}")

# Load the pre-trained Caffe models for gender classification
gender_net = cv2.dnn.readNetFromCaffe(prototxt_path, caffemodel_path)

# Define the mean values for the gender model
MODEL_MEAN_VALUES = (78.4263377603, 87.7689143744, 114.895847746)

# Gender list
gender_list = ['Male', 'Female']

# Load the pre-trained face detection model
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

# Open the webcam
cap = cv2.VideoCapture(0)

# Initialize variables to keep track of detection and greeting logic
last_detect_time = 0
last_greet_time = 0
greeting_interval = 2  # Interval to check if person is not available
greeted = False

while True:
    # Capture frame-by-frame
    ret, frame = cap.read()
    if not ret:
        break

    # Convert frame to grayscale
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    # Detect faces
    faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))

    if len(faces) > 0:
        last_detect_time = time.time()
        if not greeted:
            for (x, y, w, h) in faces:
                # Extract the face region
                face_img = frame[y:y+h, x:x+w].copy()

                # Prepare the face image for classification
                blob = cv2.dnn.blobFromImage(face_img, 1.0, (227, 227), MODEL_MEAN_VALUES, swapRB=False)
                gender_net.setInput(blob)

                # Perform gender classification
                gender_preds = gender_net.forward()
                gender = gender_list[gender_preds[0].argmax()]

                # Draw a rectangle around the face and display the gender
                label = f'{gender}'
                cv2.rectangle(frame, (x, y), (x+w, y+h), (255, 0, 0), 2)
                cv2.putText(frame, label, (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 0, 0), 2)

                # Say the greeting based on detected gender
                if gender == 'Male':
                    engine.say("Hello Sir")
                elif gender == 'Female':
                    engine.say("Hello Miss")
                engine.runAndWait()
                greeted = True
    else:
        # Check if the person has been missing for more than the greeting interval
        if time.time() - last_detect_time >= greeting_interval:
            greeted = False

    # Display the resulting frame
    cv2.imshow('Gender Classification', frame)

    # Break the loop if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the webcam and close all OpenCV windows
cap.release()
cv2.destroyAllWindows()


## good morning

In [2]:
from gtts import gTTS
import datetime
import pytz
import os
import time

def get_greeting():
    # Define the timezone for Sri Lanka
    sri_lanka_tz = pytz.timezone('Asia/Colombo')
    
    # Get the current time in Sri Lanka
    now = datetime.datetime.now(sri_lanka_tz)
    
    # Determine the greeting based on the current time
    hour = now.hour
    if 5 <= hour < 12:
        return "Good morning"
    elif 12 <= hour < 18:
        return "Good afternoon"
    else:
        return "Good evening"

def speak(text):
    # Convert text to speech
    tts = gTTS(text=text, lang='en')
    audio_file = 'greeting.mp3'
    tts.save(audio_file)
    
    # Play the audio file
    os.system(f"start {audio_file}")  # Use "start" for Windows, "open" for macOS, "xdg-open" for Linux

def main():
    # Define the interval for repeating greetings
    greeting_interval = 60 * 60  # 1 hour
    last_greeting_time = 0
    
    while True:
        current_time = time.time()
        
        # Check if it's time to speak a new greeting
        if current_time - last_greeting_time >= greeting_interval:
            greeting = get_greeting()
            speak(greeting)
            last_greeting_time = current_time
        
        # Sleep for a short time to avoid high CPU usage
        time.sleep(60)  # Check every minute

if __name__ == "__main__":
    main()


## combine code

In [9]:
import cv2
import os
import time
import pyttsx3
import datetime
import pytz

# Initialize text-to-speech engine
engine = pyttsx3.init()

# Define paths to the model files
prototxt_path = r'gender_pre_trained_models\gender_deploy.prototxt'
caffemodel_path = r'gender_pre_trained_models\gender_net.caffemodel'

# Check if files exist
if not os.path.isfile(prototxt_path):
    raise FileNotFoundError(f"Cannot find the prototxt file at {prototxt_path}")
if not os.path.isfile(caffemodel_path):
    raise FileNotFoundError(f"Cannot find the caffemodel file at {caffemodel_path}")

# Load the pre-trained Caffe models for gender classification
gender_net = cv2.dnn.readNetFromCaffe(prototxt_path, caffemodel_path)

# Define the mean values for the gender model
MODEL_MEAN_VALUES = (78.4263377603, 87.7689143744, 114.895847746)

# Gender list
gender_list = ['Male', 'Female']

# Load the pre-trained face detection model
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

# Open the webcam
cap = cv2.VideoCapture(0)

# Initialize variables to keep track of detection and greeting logic
last_detect_time = 0
last_greet_time = 0
greeting_interval = 2  # Interval to check if person is not available
greeted = False

def get_time_based_greeting():
    # Define the timezone for Sri Lanka
    sri_lanka_tz = pytz.timezone('Asia/Colombo')
    
    # Get the current time in Sri Lanka
    now = datetime.datetime.now(sri_lanka_tz)
    
    # Determine the greeting based on the current time
    hour = now.hour
    if 5 <= hour < 12:
        return "Good morning"
    elif 12 <= hour < 18:
        return "Good afternoon"
    else:
        return "Good evening"

while True:
    # Capture frame-by-frame
    ret, frame = cap.read()
    if not ret:
        break

    # Convert frame to grayscale
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    # Detect faces
    faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))

    if len(faces) > 0:
        last_detect_time = time.time()
        if not greeted:
            for (x, y, w, h) in faces:
                # Extract the face region
                face_img = frame[y:y+h, x:x+w].copy()

                # Prepare the face image for classification
                blob = cv2.dnn.blobFromImage(face_img, 1.0, (227, 227), MODEL_MEAN_VALUES, swapRB=False)
                gender_net.setInput(blob)

                # Perform gender classification
                gender_preds = gender_net.forward()
                gender = gender_list[gender_preds[0].argmax()]

                # Draw a rectangle around the face and display the gender
                label = f'{gender}'
                cv2.rectangle(frame, (x, y), (x+w, y+h), (255, 0, 0), 2)
                cv2.putText(frame, label, (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 0, 0), 2)

                # Get the time-based greeting
                time_based_greeting = get_time_based_greeting()
                
                # Say the greeting based on detected gender
                if gender == 'Male':
                    engine.say(f"{time_based_greeting} Sir")
                elif gender == 'Female':
                    engine.say(f"{time_based_greeting} Miss")
                engine.runAndWait()
                greeted = True
    else:
        # Check if the person has been missing for more than the greeting interval
        if time.time() - last_detect_time >= greeting_interval:
            greeted = False

    # Display the resulting frame
    cv2.imshow('Gender Classification', frame)

    # Break the loop if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the webcam and close all OpenCV windows
cap.release()
cv2.destroyAllWindows()


## updated one | say greeting with sir/miss

In [4]:
import cv2
import os
import time
import pyttsx3
import datetime
import pytz

# Initialize text-to-speech engine
engine = pyttsx3.init()

# Define paths to the model files
prototxt_path = r'gender_pre_trained_models\gender_deploy.prototxt'
caffemodel_path = r'gender_pre_trained_models\gender_net.caffemodel'

# Check if files exist
if not os.path.isfile(prototxt_path):
    raise FileNotFoundError(f"Cannot find the prototxt file at {prototxt_path}")
if not os.path.isfile(caffemodel_path):
    raise FileNotFoundError(f"Cannot find the caffemodel file at {caffemodel_path}")

# Load the pre-trained Caffe models for gender classification
gender_net = cv2.dnn.readNetFromCaffe(prototxt_path, caffemodel_path)

# Define the mean values for the gender model
MODEL_MEAN_VALUES = (78.4263377603, 87.7689143744, 114.895847746)

# Gender list
gender_list = ['Male', 'Female']

# Load the pre-trained face detection model
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

# Open the webcam
cap = cv2.VideoCapture(0)

# Initialize variables to keep track of detection and greeting logic
last_detect_time = 0
greeting_interval = 2  # Interval to check if person is not available
greeted = False

def get_time_based_greeting():
    # Define the timezone for Sri Lanka
    sri_lanka_tz = pytz.timezone('Asia/Colombo')
    
    # Get the current time in Sri Lanka
    now = datetime.datetime.now(sri_lanka_tz)
    
    # Determine the greeting based on the current time
    hour = now.hour
    if 5 <= hour < 12:
        return "Good morning"
    elif 12 <= hour < 18:
        return "Good afternoon"
    else:
        return "Good evening"

while True:
    # Capture frame-by-frame
    ret, frame = cap.read()
    if not ret:
        break

    # Convert frame to grayscale
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    # Detect faces
    faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))

    if len(faces) > 0:
        last_detect_time = time.time()

        if not greeted:
            for (x, y, w, h) in faces:
                # Extract the face region
                face_img = frame[y:y+h, x:x+w].copy()

                # Prepare the face image for classification
                blob = cv2.dnn.blobFromImage(face_img, 1.0, (227, 227), MODEL_MEAN_VALUES, swapRB=False)
                gender_net.setInput(blob)

                # Perform gender classification
                gender_preds = gender_net.forward()
                gender = gender_list[gender_preds[0].argmax()]

                # Draw a rectangle around the face and display the gender
                label = f'{gender}'
                cv2.rectangle(frame, (x, y), (x+w, y+h), (255, 0, 0), 2)
                cv2.putText(frame, label, (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 0, 0), 2)

                # Get the time-based greeting
                time_based_greeting = get_time_based_greeting()
                
                # Say the greeting based on detected gender
                if gender == 'Male':
                    engine.say(f"{time_based_greeting} Sir")
                elif gender == 'Female':
                    engine.say(f"{time_based_greeting} Miss")
                engine.runAndWait()
                
                # Set greeted to True to prevent repeated greetings
                greeted = True
                break  # Exit the loop after greeting
    else:
        # Check if the person has been missing for more than the greeting interval
        if time.time() - last_detect_time >= greeting_interval:
            greeted = False

    # Display the resulting frame
    cv2.imshow('Gender Classification', frame)

    # Break the loop if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the webcam and close all OpenCV windows
cap.release()
cv2.destroyAllWindows()


## gsheet reading

In [8]:
import os
import pyttsx3
from google.oauth2 import service_account
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build

# Path to your OAuth 2.0 credentials file
CREDENTIALS_FILE = 'credentials.json'
# Scopes required for Google Sheets API
SCOPES = ['https://www.googleapis.com/auth/spreadsheets']

def get_google_sheets_service():
    """Builds a Google Sheets API service object using OAuth 2.0."""
    flow = InstalledAppFlow.from_client_secrets_file(CREDENTIALS_FILE, SCOPES)
    creds = flow.run_local_server(port=0)  # Opens a browser window for user authorization
    service = build('sheets', 'v4', credentials=creds)
    return service

def read_sheet_data(service, spreadsheet_id, range_name):
    """Reads data from a Google Sheet."""
    sheet = service.spreadsheets()
    result = sheet.values().get(spreadsheetId=spreadsheet_id, range=range_name).execute()
    values = result.get('values', [])
    if not values:
        print('No data found.')
    return values

def write_sheet_data(service, spreadsheet_id, range_name, values):
    """Writes data to a Google Sheet."""
    sheet = service.spreadsheets()
    body = {
        'values': values
    }
    result = sheet.values().update(spreadsheetId=spreadsheet_id, range=range_name, valueInputOption='RAW', body=body).execute()
    return result

def speak(text):
    """Speaks the provided text."""
    engine = pyttsx3.init()
    engine.say(text)
    engine.runAndWait()

def main():
    service = get_google_sheets_service()
    
    # Replace with your Google Sheets ID and range
    spreadsheet_id = '1lNZDwXKNlNkZiLWKksob55RE70e3IbSAhkR0W_pjGV4'
    range_name = 'Sheet1!A1:C'  # Adjust the range as needed
    
    # Read data from the Google Sheet
    data = read_sheet_data(service, spreadsheet_id, range_name)
    
    # Speak the details
    for row in data:
        if len(row) >= 3:  # Ensure there are at least 3 columns
            product = row[0]
            brand_type = row[1]
            price = row[2]
            details = f"Product: {product}, Brand and Type: {brand_type}, Price: {price}"
            print(details)  # Optional: Print to console
            speak(details)  # Read out the details

    # Example data to write
    new_values = [
        ["New Product", "New Brand", "New Price"],
        ["Another Product", "Another Brand", "Another Price"]
    ]
    
    # Write new data to the Google Sheet
    write_range = 'Sheet1!A4:C'  # Adjust the range where you want to write the new data
    result = write_sheet_data(service, spreadsheet_id, write_range, new_values)
    print(f"Cells updated: {result.get('updatedCells')}")

if __name__ == '__main__':
    main()


Please visit this URL to authorize this application: https://accounts.google.com/o/oauth2/auth?response_type=code&client_id=965765739038-bsu15r84t2433o5ncsj3fnuktd39q13u.apps.googleusercontent.com&redirect_uri=http%3A%2F%2Flocalhost%3A64558%2F&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fspreadsheets&state=l1DnhPlkqEaPLde5jhafG94uOWsnep&access_type=offline
Product: Product, Brand and Type: BrandType, Price: Price
Product: TV, Brand and Type: samsung 32'', Price: Rs.38000
Product: gas cooker , Brand and Type: usha , Price: Rs.15000
Product: New Product, Brand and Type: New Brand, Price: New Price
Product: Another Product, Brand and Type: Another Brand, Price: Another Price
Cells updated: 6


## without accesing g account

In [5]:
import os
import pickle
from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build

# Replace with your Google Sheets ID and range
SPREADSHEET_ID = '1lNZDwXKNlNkZiLWKksob55RE70e3IbSAhkR0W_pjGV4'  # Extracted from the URL
READ_RANGE = 'Sheet1!A1:C'  # Adjust the range as needed

# Path to your credentials.json file
CREDENTIALS_FILE = 'credentials.json'
TOKEN_FILE = 'token.pickle'

def get_google_sheets_service():
    """Builds a Google Sheets API service object."""
    creds = None

    # Load credentials from file if they exist
    if os.path.exists(TOKEN_FILE):
        with open(TOKEN_FILE, 'rb') as token:
            creds = pickle.load(token)

    # If there are no (valid) credentials available, let the user log in.
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:
            flow = InstalledAppFlow.from_client_secrets_file(
                CREDENTIALS_FILE, 
                scopes=['https://www.googleapis.com/auth/spreadsheets.readonly']
            )
            creds = flow.run_local_server(port=0)

        # Save the credentials for the next run
        with open(TOKEN_FILE, 'wb') as token:
            pickle.dump(creds, token)

    service = build('sheets', 'v4', credentials=creds)
    return service

def read_sheet_data(service, spreadsheet_id, range_name):
    """Reads data from a Google Sheet."""
    sheet = service.spreadsheets()
    result = sheet.values().get(spreadsheetId=spreadsheet_id, range=range_name).execute()
    values = result.get('values', [])
    if not values:
        print('No data found.')
    return values

def main():
    service = get_google_sheets_service()

    # Read data from the Google Sheet
    data = read_sheet_data(service, SPREADSHEET_ID, READ_RANGE)

    # Print the data
    for row in data:
        print(row)

if __name__ == '__main__':
    main()


['Product', 'BrandType', 'Price']
['TV', "samsung 32''", 'Rs.38000']
['gas cooker ', 'usha ', 'Rs.15000']
['New Product', 'New Brand', 'New Price']
['Another Product', 'Another Brand', 'Another Price']


## normal accesing public sheet

In [8]:
import requests
import pandas as pd
from io import StringIO

# Replace with your Google Sheets URL
SPREADSHEET_ID = '1lNZDwXKNlNkZiLWKksob55RE70e3IbSAhkR0W_pjGV4'
SHEET_NAME = 'Sheet1'  # Adjust if necessary

# Construct the URL for the Google Sheets API
url = f'https://docs.google.com/spreadsheets/d/{SPREADSHEET_ID}/gviz/tq?tqx=out:csv&sheet={SHEET_NAME}'

def fetch_data_from_sheet(url):
    """Fetches CSV data from the Google Sheet URL."""
    response = requests.get(url)
    if response.status_code == 200:
        return response.content.decode('utf-8')
    else:
        print('Failed to fetch data.')
        return None

def main():
    csv_data = fetch_data_from_sheet(url)
    if csv_data:
        # Convert CSV data to DataFrame using StringIO
        df = pd.read_csv(StringIO(csv_data))
        print(df)

if __name__ == '__main__':
    main()


           Product      BrandType          Price
0               TV   samsung 32''       Rs.38000
1      gas cooker           usha        Rs.15000
2      New Product      New Brand      New Price
3  Another Product  Another Brand  Another Price


## getting user inputs and answer the quension

In [12]:
import requests
import pandas as pd
from io import StringIO
import pyttsx3
import speech_recognition as sr

# Google Sheets configuration
SPREADSHEET_ID = '1lNZDwXKNlNkZiLWKksob55RE70e3IbSAhkR0W_pjGV4'
SHEET_NAME = 'Sheet1'  # Adjust if necessary
URL = f'https://docs.google.com/spreadsheets/d/{SPREADSHEET_ID}/gviz/tq?tqx=out:csv&sheet={SHEET_NAME}'

def fetch_data_from_sheet(url):
    """Fetches CSV data from the Google Sheet URL."""
    response = requests.get(url)
    if response.status_code == 200:
        return response.content.decode('utf-8')
    else:
        print('Failed to fetch data.')
        return None

def speak(text):
    """Speaks the provided text."""
    engine = pyttsx3.init()
    engine.say(text)
    engine.runAndWait()

def get_user_input():
    """Gets user input via speech recognition."""
    recognizer = sr.Recognizer()
    with sr.Microphone() as source:
        print("Listening...")
        audio = recognizer.listen(source)
        try:
            text = recognizer.recognize_google(audio)
            print(f"You said: {text}")
            return text
        except sr.UnknownValueError:
            print("Sorry, I did not understand that.")
            return ""
        except sr.RequestError:
            print("Sorry, there was an issue with the speech recognition service.")
            return ""

def main():
    # Greet the user
    speak("How can I help you?")
    
    # Get user input
    user_input = get_user_input()
    
    if not user_input:
        return

    # Extract keywords from user input
    keywords = ['tv', 'gas cooker']  # Add more keywords as needed
    matched_keywords = [keyword for keyword in keywords if keyword in user_input.lower()]

    if not matched_keywords:
        speak("Sorry, I couldn't find any matching items.")
        return

    # Fetch data from Google Sheets
    csv_data = fetch_data_from_sheet(URL)
    if not csv_data:
        speak("Failed to fetch data from the Google Sheet.")
        return
    
    # Convert CSV data to DataFrame
    df = pd.read_csv(StringIO(csv_data))
    
    # Check for matches in DataFrame
    for keyword in matched_keywords:
        matched_rows = df[df['Product'].str.contains(keyword, case=False, na=False)]
        if not matched_rows.empty:
            for _, row in matched_rows.iterrows():
                product = row['Product']
                brand_type = row['BrandType']
                price = row['Price']
                details = f"We have {product}, {brand_type} with the price {price}"
                speak(details)
        else:
            speak(f"Sorry, we don't have information on {keyword}.")

if __name__ == '__main__':
    main()


Listening...
You said: I won't buy gas cooker


## update google sheet accesing

In [14]:
import requests
import pandas as pd
from io import StringIO
import pyttsx3
import speech_recognition as sr

# Google Sheets configuration
SPREADSHEET_ID = '1lNZDwXKNlNkZiLWKksob55RE70e3IbSAhkR0W_pjGV4'
SHEET_NAME = 'Sheet1'  # Adjust if necessary
URL = f'https://docs.google.com/spreadsheets/d/{SPREADSHEET_ID}/gviz/tq?tqx=out:csv&sheet={SHEET_NAME}'

def fetch_data_from_sheet(url):
    """Fetches CSV data from the Google Sheet URL."""
    response = requests.get(url)
    if response.status_code == 200:
        return response.content.decode('utf-8')
    else:
        print('Failed to fetch data.')
        return None

def speak(text):
    """Speaks the provided text."""
    engine = pyttsx3.init()
    engine.say(text)
    engine.runAndWait()

def get_user_input():
    """Gets user input via speech recognition."""
    recognizer = sr.Recognizer()
    with sr.Microphone() as source:
        print("Listening...")
        audio = recognizer.listen(source)
        try:
            text = recognizer.recognize_google(audio)
            print(f"You said: {text}")
            return text
        except sr.UnknownValueError:
            print("Sorry, I did not understand that.")
            return ""
        except sr.RequestError:
            print("Sorry, there was an issue with the speech recognition service.")
            return ""

def preprocess_data(df):
    """Preprocess the DataFrame to handle empty rows and group product details."""
    df = df.fillna('')
    df['Product'] = df['Product'].replace('', pd.NA).ffill()
    return df.dropna(subset=['Product'])

def main():
    # Greet the user
    speak("How can I help you?")
    
    # Get user input
    user_input = get_user_input()
    
    if not user_input:
        return

    # Extract keywords from user input
    keywords = ['tv', 'gas cooker', 'fridge']  # Add more keywords as needed
    matched_keywords = [keyword for keyword in keywords if keyword in user_input.lower()]

    if not matched_keywords:
        speak("Sorry, I couldn't find any matching items.")
        return

    # Fetch data from Google Sheets
    csv_data = fetch_data_from_sheet(URL)
    if not csv_data:
        speak("Failed to fetch data from the Google Sheet.")
        return
    
    # Convert CSV data to DataFrame
    df = pd.read_csv(StringIO(csv_data))
    
    # Preprocess the data
    df = preprocess_data(df)
    
    # Check for matches in DataFrame
    for keyword in matched_keywords:
        matched_rows = df[df['Product'].str.contains(keyword, case=False, na=False)]
        if not matched_rows.empty:
            for _, row in matched_rows.iterrows():
                product = row['Product']
                brand_type = row['BrandType']
                price = row['Price']
                details = f"We have {product}, {brand_type} with the price {price}"
                speak(details)
        else:
            speak(f"Sorry, we don't have information on {keyword}.")

if __name__ == '__main__':
    main()


Listening...
You said: goodbye fridge


## continuse run

In [15]:
import requests
import pandas as pd
from io import StringIO
import pyttsx3
import speech_recognition as sr

# Google Sheets configuration
SPREADSHEET_ID = '1lNZDwXKNlNkZiLWKksob55RE70e3IbSAhkR0W_pjGV4'
SHEET_NAME = 'Sheet1'  # Adjust if necessary
URL = f'https://docs.google.com/spreadsheets/d/{SPREADSHEET_ID}/gviz/tq?tqx=out:csv&sheet={SHEET_NAME}'

def fetch_data_from_sheet(url):
    """Fetches CSV data from the Google Sheet URL."""
    response = requests.get(url)
    if response.status_code == 200:
        return response.content.decode('utf-8')
    else:
        print('Failed to fetch data.')
        return None

def speak(text):
    """Speaks the provided text."""
    engine = pyttsx3.init()
    engine.say(text)
    engine.runAndWait()

def get_user_input():
    """Gets user input via speech recognition."""
    recognizer = sr.Recognizer()
    with sr.Microphone() as source:
        print("Listening...")
        audio = recognizer.listen(source)
        try:
            text = recognizer.recognize_google(audio)
            print(f"You said: {text}")
            return text
        except sr.UnknownValueError:
            print("Sorry, I did not understand that.")
            return ""
        except sr.RequestError:
            print("Sorry, there was an issue with the speech recognition service.")
            return ""

def preprocess_data(df):
    """Preprocess the DataFrame to handle empty rows and group product details."""
    df = df.fillna('')
    df['Product'] = df['Product'].replace('', pd.NA).ffill()
    return df.dropna(subset=['Product'])

def main():
    # Fetch data from Google Sheets
    csv_data = fetch_data_from_sheet(URL)
    if not csv_data:
        speak("Failed to fetch data from the Google Sheet.")
        return
    
    # Convert CSV data to DataFrame
    df = pd.read_csv(StringIO(csv_data))
    
    # Preprocess the data
    df = preprocess_data(df)
    
    while True:
        # Greet the user
        speak("How can I help you?")
        
        # Get user input
        user_input = get_user_input()
        
        if not user_input:
            continue

        # Extract keywords from user input
        keywords = ['tv', 'gas cooker', 'fridge']  # Add more keywords as needed
        matched_keywords = [keyword for keyword in keywords if keyword in user_input.lower()]

        if not matched_keywords:
            speak("Sorry, I couldn't find any matching items.")
        else:
            # Check for matches in DataFrame
            for keyword in matched_keywords:
                matched_rows = df[df['Product'].str.contains(keyword, case=False, na=False)]
                if not matched_rows.empty:
                    for _, row in matched_rows.iterrows():
                        product = row['Product']
                        brand_type = row['BrandType']
                        price = row['Price']
                        details = f"We have {product}, {brand_type} with the price {price}"
                        speak(details)
                else:
                    speak(f"Sorry, we don't have information on {keyword}.")
        
        # Ask if the user wants more information
        speak("Any other information you want to know?")
        user_input = get_user_input()
        if 'no' in user_input.lower():
            speak("Thank you! Have a great day!")
            break

if __name__ == '__main__':
    main()


Listening...
Sorry, I did not understand that.
Listening...
You said: I won't buy TV
Listening...
You said: buy gas cooker
Listening...
You said: I won't buy gas cooker
Listening...
You said: goodbye TV
Listening...
You said: I want bit
Listening...
You said: I won't buy fridge
Listening...
You said: I won't buy fridge
Listening...
You said: I won't buy gas cooker
Listening...
You said: I won't buy gas cook
Listening...
You said: I won't buy gas cooker
Listening...
You said: I want buy a gas cooker
Listening...


KeyboardInterrupt: 

## update code with more key words

In [17]:
import requests
import pandas as pd
from io import StringIO
import pyttsx3
import speech_recognition as sr

# Google Sheets configuration
SPREADSHEET_ID = '1lNZDwXKNlNkZiLWKksob55RE70e3IbSAhkR0W_pjGV4'
SHEET_NAME = 'Sheet1'  # Adjust if necessary
URL = f'https://docs.google.com/spreadsheets/d/{SPREADSHEET_ID}/gviz/tq?tqx=out:csv&sheet={SHEET_NAME}'

def fetch_data_from_sheet(url):
    """Fetches CSV data from the Google Sheet URL."""
    response = requests.get(url)
    if response.status_code == 200:
        return response.content.decode('utf-8')
    else:
        print('Failed to fetch data.')
        return None

def speak(text):
    """Speaks the provided text."""
    engine = pyttsx3.init()
    engine.say(text)
    engine.runAndWait()

def get_user_input():
    """Gets user input via speech recognition."""
    recognizer = sr.Recognizer()
    with sr.Microphone() as source:
        print("Listening...")
        audio = recognizer.listen(source)
        try:
            text = recognizer.recognize_google(audio)
            print(f"You said: {text}")
            return text
        except sr.UnknownValueError:
            print("Sorry, I did not understand that.")
            return ""
        except sr.RequestError:
            print("Sorry, there was an issue with the speech recognition service.")
            return ""

def preprocess_data(df):
    """Preprocess the DataFrame to handle empty rows and group product details."""
    df = df.fillna('')
    df['Product'] = df['Product'].replace('', pd.NA).ffill()
    return df.dropna(subset=['Product'])

def main():
    # Fetch data from Google Sheets
    csv_data = fetch_data_from_sheet(URL)
    if not csv_data:
        speak("Failed to fetch data from the Google Sheet.")
        return
    
    # Convert CSV data to DataFrame
    df = pd.read_csv(StringIO(csv_data))
    
    # Preprocess the data
    df = preprocess_data(df)
    
    # Dictionary mapping keywords to products
    product_keywords = {
        'tv': ['tv', 'television'],
        'gas cooker': ['gas cook', 'gas cooker', 'gascooker'],
        'fridge': ['fridge', 'refrigerator']
    }

    while True:
        # Greet the user
        speak("How can I help you?")
        
        # Get user input
        user_input = get_user_input()
        
        if not user_input:
            continue

        # Find matching products
        matched_products = []
        for product, keywords in product_keywords.items():
            for keyword in keywords:
                if keyword in user_input.lower():
                    matched_products.append(product)
                    break

        if not matched_products:
            speak("Sorry, I couldn't find any matching items.")
        else:
            # Check for matches in DataFrame
            for product in matched_products:
                matched_rows = df[df['Product'].str.contains(product, case=False, na=False)]
                if not matched_rows.empty:
                    for _, row in matched_rows.iterrows():
                        product_name = row['Product']
                        brand_type = row['BrandType']
                        price = row['Price']
                        details = f"We have {product_name}, {brand_type} with the price {price}"
                        speak(details)
                else:
                    speak(f"Sorry, we don't have information on {product}.")
        
        # Ask if the user wants more information
        speak("Any other information you want to know?")
        user_input = get_user_input()
        if 'no' in user_input.lower():
            speak("Thank you! Have a great day!")
            break

if __name__ == '__main__':
    main()


Listening...
You said: buy a TV I won't buy a TV
Listening...
You said: I would buy a fridge
Listening...


KeyboardInterrupt: 

## error fixed

In [4]:
import requests
import pandas as pd
from io import StringIO
import pyttsx3
import speech_recognition as sr

# Google Sheets configuration
SPREADSHEET_ID = '1lNZDwXKNlNkZiLWKksob55RE70e3IbSAhkR0W_pjGV4'
SHEET_NAME = 'Sheet1'  # Adjust if necessary
URL = f'https://docs.google.com/spreadsheets/d/{SPREADSHEET_ID}/gviz/tq?tqx=out:csv&sheet={SHEET_NAME}'

def fetch_data_from_sheet(url):
    """Fetches CSV data from the Google Sheet URL."""
    response = requests.get(url)
    if response.status_code == 200:
        return response.content.decode('utf-8')
    else:
        print('Failed to fetch data.')
        return None

def speak(text):
    """Speaks the provided text."""
    engine = pyttsx3.init()
    engine.say(text)
    engine.runAndWait()

def get_user_input():
    """Gets user input via speech recognition."""
    recognizer = sr.Recognizer()
    with sr.Microphone() as source:
        print("Listening...")
        audio = recognizer.listen(source)
        try:
            text = recognizer.recognize_google(audio)
            print(f"You said: {text}")
            return text
        except sr.UnknownValueError:
            print("Sorry, I did not understand that.")
            return ""
        except sr.RequestError:
            print("Sorry, there was an issue with the speech recognition service.")
            return ""

def preprocess_data(df):
    """Preprocess the DataFrame to handle empty rows and group product details."""
    df = df.fillna('')
    df['Product'] = df['Product'].replace('', pd.NA).ffill()
    return df.dropna(subset=['Product'])

def main():
    # Fetch data from Google Sheets
    csv_data = fetch_data_from_sheet(URL)
    if not csv_data:
        speak("Failed to fetch data from the Google Sheet.")
        return
    
    # Convert CSV data to DataFrame
    df = pd.read_csv(StringIO(csv_data))
    
    # Preprocess the data
    df = preprocess_data(df)
    
    # Dictionary mapping keywords to products
    product_keywords = {
        'tv': ['tv', 'television'],
        'gas cooker': ['gas cook', 'gas cooker', 'gascooker'],
        'fridge': ['fridge', 'refrigerator']
    }

    while True:
        # Greet the user
        speak("How can I help you?")
        
        # Get user input
        user_input = get_user_input()
        
        if not user_input:
            continue

        while True:
            # Find matching products
            matched_products = []
            for product, keywords in product_keywords.items():
                for keyword in keywords:
                    if keyword in user_input.lower():
                        matched_products.append(product)
                        break

            if not matched_products:
                speak("Sorry, I couldn't find any matching items.")
            else:
                # Check for matches in DataFrame
                for product in matched_products:
                    matched_rows = df[df['Product'].str.contains(product, case=False, na=False)]
                    if not matched_rows.empty:
                        for _, row in matched_rows.iterrows():
                            product_name = row['Product']
                            brand_type = row['BrandType']
                            price = row['Price']
                            details = f"We have {product_name}, {brand_type} with the price {price}"
                            speak(details)
                    else:
                        speak(f"Sorry, we don't have information on {product}.")
            
            # Ask if the user wants more information
            speak("Any other information you want to know?")
            user_input = get_user_input()
            if 'no' in user_input.lower() or 'nothing' in user_input.lower():
                speak("Thank you! Have a great day!")
                return  # Exit the main function

if __name__ == '__main__':
    main()


Listening...
You said: I want buy TV
Listening...
You said: no


## Combine code (google sheet accessing with trainde model accessing)

In [6]:
import requests
import pandas as pd
from io import StringIO
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import torch

# Google Sheets configuration
SPREADSHEET_ID = '1lNZDwXKNlNkZiLWKksob55RE70e3IbSAhkR0W_pjGV4'
SHEET_NAME = 'Sheet1'  # Adjust if necessary
URL = f'https://docs.google.com/spreadsheets/d/{SPREADSHEET_ID}/gviz/tq?tqx=out:csv&sheet={SHEET_NAME}'

# Load the trained model and tokenizer
model_name = 'custom_seller_bot_model'  # Directory where your trained model is saved
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name)

# Ensure the model is in evaluation mode
model.eval()

def fetch_data_from_sheet(url):
    """Fetches CSV data from the Google Sheet URL."""
    response = requests.get(url)
    if response.status_code == 200:
        return response.content.decode('utf-8')
    else:
        print('Failed to fetch data.')
        return None

def preprocess_data(df):
    """Preprocess the DataFrame to handle empty rows and group product details."""
    df = df.fillna('')
    df['Product'] = df['Product'].replace('', pd.NA).ffill()
    return df.dropna(subset=['Product'])

def generate_response(prompt, max_length=100):
    """Generates a response from the GPT-2 model."""
    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=max_length)
    input_ids = inputs["input_ids"]
    attention_mask = inputs["attention_mask"]
    
    with torch.no_grad():
        output_ids = model.generate(
            input_ids,
            attention_mask=attention_mask,
            max_length=max_length,
            num_return_sequences=1,
            do_sample=True,
            temperature=0.7,
            top_p=0.9,
            pad_token_id=tokenizer.pad_token_id,
        )
    generated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
    return generated_text

def main():
    # Fetch data from Google Sheets
    csv_data = fetch_data_from_sheet(URL)
    if not csv_data:
        print("Failed to fetch data from the Google Sheet.")
        return
    
    # Convert CSV data to DataFrame
    df = pd.read_csv(StringIO(csv_data))
    
    # Preprocess the data
    df = preprocess_data(df)
    
    # Dictionary mapping keywords to products
    product_keywords = {
        'tv': ['tv', 'television'],
        'gas cooker': ['gas cook', 'gas cooker', 'gascooker'],
        'fridge': ['fridge', 'refrigerator']
    }

    while True:
        # Get user input
        user_input = input("You: ")
        
        if user_input.lower() == 'exit':
            print("Exiting...")
            break

        # Generate a response from the model
        response = generate_response(user_input)
        print(f"Model: {response}")

        # Find matching products
        matched_products = []
        for product, keywords in product_keywords.items():
            for keyword in keywords:
                if keyword in user_input.lower():
                    matched_products.append(product)
                    break

        if matched_products:
            # Check for matches in DataFrame
            for product in matched_products:
                matched_rows = df[df['Product'].str.contains(product, case=False, na=False)]
                if not matched_rows.empty:
                    for _, row in matched_rows.iterrows():
                        product_name = row['Product']
                        brand_type = row['BrandType']
                        price = row['Price']
                        details = f"We have {product_name}, {brand_type} with the price {price}"
                        print(details)
                else:
                    print(f"Sorry, we don't have information on {product}.")
        else:
            print("Sorry, I couldn't find any matching items.")
        
        # Ask if the user wants more information
        user_input = input("Any other information you want to know? (Type 'no' to exit): ")
        if 'no' in user_input.lower() or 'nothing' in user_input.lower():
            print("Thank you! Have a great day!")
            break

if __name__ == '__main__':
    main()


Model: i want buya tv. Let me check the latest price for you.
We have TV, samsung 32'' with the price Rs.38000
We have TV, samsung 48'' with the price Rs.48000
We have TV, LG 72" with the price Rs.60000
Model: i want buy a rice cooker. Let me check the available rice cookers for you.
Sorry, I couldn't find any matching items.
Model: how about fridge? Sure, let me see what fridge we have in stock.
We have Fridge, sisil with the price Rs.78000
We have Fridge, samsung with the price Rs.90000
Model: how about tv? Sure, let me check the latest price for you.
We have TV, samsung 32'' with the price Rs.38000
We have TV, samsung 48'' with the price Rs.48000
We have TV, LG 72" with the price Rs.60000
Model: i want buy a rice cooker. Let me check the available rice cookers for you.
Sorry, I couldn't find any matching items.


RuntimeError: cannot reshape tensor of 0 elements into shape [-1, 0] because the unspecified dimension size -1 can be any value and is ambiguous

## updatd code

### Key Changes:
Load Model and Tokenizer: Added generate_response function to get model-generated responses.
User Input Handling: First, the user input is processed by the model, and then the response from the model is used for keyword extraction.
Keyword Extraction: Keywords are searched in the model’s response to determine which products might be of interest.
Data Fetching and Processing: Handles CSV data fetching and preprocessing as before.

In [7]:
import requests
import pandas as pd
from io import StringIO
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import torch

# Google Sheets configuration
SPREADSHEET_ID = '1lNZDwXKNlNkZiLWKksob55RE70e3IbSAhkR0W_pjGV4'
SHEET_NAME = 'Sheet1'  # Adjust if necessary
URL = f'https://docs.google.com/spreadsheets/d/{SPREADSHEET_ID}/gviz/tq?tqx=out:csv&sheet={SHEET_NAME}'

def fetch_data_from_sheet(url):
    """Fetches CSV data from the Google Sheet URL."""
    response = requests.get(url)
    if response.status_code == 200:
        return response.content.decode('utf-8')
    else:
        print('Failed to fetch data.')
        return None

def preprocess_data(df):
    """Preprocess the DataFrame to handle empty rows and group product details."""
    df = df.fillna('')
    df['Product'] = df['Product'].replace('', pd.NA).ffill()
    return df.dropna(subset=['Product'])

def generate_response(prompt, model, tokenizer, max_length=100):
    """Generates a response from the model based on the prompt."""
    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=max_length)
    input_ids = inputs["input_ids"]
    attention_mask = inputs["attention_mask"]

    with torch.no_grad():
        output_ids = model.generate(
            input_ids,
            attention_mask=attention_mask,
            max_length=max_length,
            num_return_sequences=1,
            do_sample=True,
            temperature=0.7,
            top_p=0.9,
            pad_token_id=tokenizer.pad_token_id,
        )

    generated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
    return generated_text

def main():
    # Load the trained model and tokenizer
    model_name = 'custom_seller_bot_model'  # Directory where your trained model is saved
    tokenizer = GPT2Tokenizer.from_pretrained(model_name)
    model = GPT2LMHeadModel.from_pretrained(model_name)
    model.eval()

    # Fetch data from Google Sheets
    csv_data = fetch_data_from_sheet(URL)
    if not csv_data:
        print("Failed to fetch data from the Google Sheet.")
        return
    
    # Convert CSV data to DataFrame
    df = pd.read_csv(StringIO(csv_data))
    
    # Preprocess the data
    df = preprocess_data(df)

    # Dictionary mapping keywords to products
    product_keywords = {
        'tv': ['tv', 'television'],
        'gas cooker': ['gas cook', 'gas cooker', 'gascooker'],
        'fridge': ['fridge', 'refrigerator']
    }

    while True:
        # Get user input
        user_input = input("You: ")
        if user_input.lower() == 'exit':
            print("Exiting...")
            break

        # Generate response from the model
        model_response = generate_response(user_input, model, tokenizer)
        print(f"Model: {model_response}")

        # Process the model response to find matching products
        matched_products = []
        for product, keywords in product_keywords.items():
            for keyword in keywords:
                if keyword in model_response.lower():
                    matched_products.append(product)
                    break

        if not matched_products:
            print("Sorry, I couldn't find any matching items.")
        else:
            # Check for matches in DataFrame
            for product in matched_products:
                matched_rows = df[df['Product'].str.contains(product, case=False, na=False)]
                if not matched_rows.empty:
                    for _, row in matched_rows.iterrows():
                        product_name = row['Product']
                        brand_type = row['BrandType']
                        price = row['Price']
                        details = f"We have {product_name}, {brand_type} with the price {price}"
                        print(details)
                else:
                    print(f"Sorry, we don't have information on {product}.")

if __name__ == '__main__':
    main()


Model: i want buy a tv. Let me check the latest price for you.
We have TV, samsung 32'' with the price Rs.38000
We have TV, samsung 48'' with the price Rs.48000
We have TV, LG 72" with the price Rs.60000
Model: how about rice cooker? Sure, let me see what rice cookers we have in stock.
Sorry, I couldn't find any matching items.
Model: how about gas cooker? Sure, let me see what gas cookers we have in stock.
We have gas cooker , usha  with the price Rs.15000
We have gas cooker , samsung with the price Rs24000
Model: do you have tv? Sure, let me see what tv's we have in stock.
We have TV, samsung 32'' with the price Rs.38000
We have TV, samsung 48'' with the price Rs.48000
We have TV, LG 72" with the price Rs.60000


RuntimeError: cannot reshape tensor of 0 elements into shape [-1, 0] because the unspecified dimension size -1 can be any value and is ambiguous

To modify the code so that it dynamically extracts keywords from the Google Sheet and updates the keyword list in a separate file, you can follow these steps:

Fetch and Extract Keywords Dynamically: Update the script to extract the product keywords from the Google Sheet.
Save Keywords to a Separate File: Write the extracted keywords to a separate file.
Update Keyword Matching: Adjust the product matching logic to use the dynamically extracted keywords.

In [8]:
import requests
import pandas as pd
from io import StringIO
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import torch

# Google Sheets configuration
SPREADSHEET_ID = '1lNZDwXKNlNkZiLWKksob55RE70e3IbSAhkR0W_pjGV4'
SHEET_NAME = 'Sheet1'  # Adjust if necessary
URL = f'https://docs.google.com/spreadsheets/d/{SPREADSHEET_ID}/gviz/tq?tqx=out:csv&sheet={SHEET_NAME}'

def fetch_data_from_sheet(url):
    """Fetches CSV data from the Google Sheet URL."""
    response = requests.get(url)
    if response.status_code == 200:
        return response.content.decode('utf-8')
    else:
        print('Failed to fetch data.')
        return None

def preprocess_data(df):
    """Preprocess the DataFrame to handle empty rows and group product details."""
    df = df.fillna('')
    df['Product'] = df['Product'].replace('', pd.NA).ffill()
    return df.dropna(subset=['Product'])

def save_keywords_to_file(keywords, filename='keywords.txt'):
    """Saves the extracted keywords to a file."""
    with open(filename, 'w') as file:
        for keyword in keywords:
            file.write(f"{keyword}\n")

def generate_response(prompt, model, tokenizer, max_length=100):
    """Generates a response from the model based on the prompt."""
    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=max_length)
    input_ids = inputs["input_ids"]
    attention_mask = inputs["attention_mask"]

    with torch.no_grad():
        output_ids = model.generate(
            input_ids,
            attention_mask=attention_mask,
            max_length=max_length,
            num_return_sequences=1,
            do_sample=True,
            temperature=0.7,
            top_p=0.9,
            pad_token_id=tokenizer.pad_token_id,
        )

    generated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
    return generated_text

def main():
    # Load the trained model and tokenizer
    model_name = 'custom_seller_bot_model'  # Directory where your trained model is saved
    tokenizer = GPT2Tokenizer.from_pretrained(model_name)
    model = GPT2LMHeadModel.from_pretrained(model_name)
    model.eval()

    # Fetch data from Google Sheets
    csv_data = fetch_data_from_sheet(URL)
    if not csv_data:
        print("Failed to fetch data from the Google Sheet.")
        return
    
    # Convert CSV data to DataFrame
    df = pd.read_csv(StringIO(csv_data))
    
    # Preprocess the data
    df = preprocess_data(df)
    
    # Extract keywords from the first column of the DataFrame
    keywords = df['Product'].dropna().unique().tolist()
    save_keywords_to_file(keywords)

    while True:
        # Get user input
        user_input = input("You: ")
        if user_input.lower() == 'exit':
            print("Exiting...")
            break

        # Generate response from the model
        model_response = generate_response(user_input, model, tokenizer)
        print(f"Model: {model_response}")

        # Process the model response to find matching products
        matched_products = [keyword for keyword in keywords if keyword.lower() in model_response.lower()]

        if not matched_products:
            print("Sorry, I couldn't find any matching items.")
        else:
            # Check for matches in DataFrame
            for product in matched_products:
                matched_rows = df[df['Product'].str.contains(product, case=False, na=False)]
                if not matched_rows.empty:
                    for _, row in matched_rows.iterrows():
                        product_name = row['Product']
                        brand_type = row['BrandType']
                        price = row['Price']
                        details = f"We have {product_name}, {brand_type} with the price {price}"
                        print(details)
                else:
                    print(f"Sorry, we don't have information on {product}.")

if __name__ == '__main__':
    main()


Model: i want buy a tv. Let me check the latest price for you.
We have TV, samsung 32'' with the price Rs.38000
We have TV, samsung 48'' with the price Rs.48000
We have TV, LG 72" with the price Rs.60000
Model: how about gas cooker? Sure, let me see what gas cookers we have in stock.
Sorry, I couldn't find any matching items.


RuntimeError: cannot reshape tensor of 0 elements into shape [-1, 0] because the unspecified dimension size -1 can be any value and is ambiguous

In [11]:
import requests
import pandas as pd
from io import StringIO
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import torch

# Google Sheets configuration
SPREADSHEET_ID = '1lNZDwXKNlNkZiLWKksob55RE70e3IbSAhkR0W_pjGV4'
SHEET_NAME = 'Sheet1'  # Adjust if necessary
URL = f'https://docs.google.com/spreadsheets/d/{SPREADSHEET_ID}/gviz/tq?tqx=out:csv&sheet={SHEET_NAME}'

def fetch_data_from_sheet(url):
    """Fetches CSV data from the Google Sheet URL."""
    response = requests.get(url)
    if response.status_code == 200:
        return response.content.decode('utf-8')
    else:
        print('Failed to fetch data.')
        return None

def preprocess_data(df):
    """Preprocess the DataFrame to handle empty rows and group product details."""
    df = df.fillna('')
    df['Product'] = df['Product'].replace('', pd.NA).ffill()
    return df.dropna(subset=['Product'])

def generate_response(prompt, model, tokenizer, max_length=100):
    """Generates a response from the model based on the prompt."""
    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=max_length)
    input_ids = inputs["input_ids"]
    attention_mask = inputs["attention_mask"]

    with torch.no_grad():
        output_ids = model.generate(
            input_ids,
            attention_mask=attention_mask,
            max_length=max_length,
            num_return_sequences=1,
            do_sample=True,
            temperature=0.7,
            top_p=0.9,
            pad_token_id=tokenizer.pad_token_id,
        )

    generated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
    return generated_text

def extract_keywords_from_text(text, keywords):
    """Extracts keywords from the given text."""
    extracted_keywords = []
    text_lower = text.lower()
    for keyword in keywords:
        if keyword.lower() in text_lower:
            extracted_keywords.append(keyword)
    return extracted_keywords

def main():
    # Load the trained model and tokenizer
    model_name = 'custom_seller_bot_model'  # Directory where your trained model is saved
    tokenizer = GPT2Tokenizer.from_pretrained(model_name)
    model = GPT2LMHeadModel.from_pretrained(model_name)
    model.eval()

    # Fetch data from Google Sheets
    csv_data = fetch_data_from_sheet(URL)
    if not csv_data:
        print("Failed to fetch data from the Google Sheet.")
        return
    
    # Convert CSV data to DataFrame
    df = pd.read_csv(StringIO(csv_data))
    
    # Preprocess the data
    df = preprocess_data(df)
    
    # Extract keywords from the DataFrame
    keywords = df['Product'].dropna().unique().tolist()

    while True:
        # Get user input
        user_input = input("You: ")
        if user_input.lower() == 'exit':
            print("Exiting...")
            break

        # Generate response from the model
        model_response = generate_response(user_input, model, tokenizer)
        print(f"Model: {model_response}")

        # Extract keywords from the model response
        matched_keywords = extract_keywords_from_text(model_response, keywords)

        if not matched_keywords:
            print("Sorry, I couldn't find any matching items.")
        else:
            # Check for matches in DataFrame
            for keyword in matched_keywords:
                matched_rows = df[df['Product'].str.contains(keyword, case=False, na=False)]
                if not matched_rows.empty:
                    for _, row in matched_rows.iterrows():
                        product_name = row['Product']
                        brand_type = row['BrandType']
                        price = row['Price']
                        details = f"We have {product_name}, {brand_type} with the price {price}"
                        print(details)
                else:
                    print(f"Sorry, we don't have information on {keyword}.")

if __name__ == '__main__':
    main()


Model: i want buy a gas cooker. Let me check the available gas cookers for you.
Sorry, I couldn't find any matching items.
Model: how about tv sets? Sure, let me see what tv sets we have in stock.
We have TV, samsung 32'' with the price Rs.38000
We have TV, samsung 48'' with the price Rs.48000
We have TV, LG 72" with the price Rs.60000
Model: don't you have gas cooker? Sure, let me see what gas cookers we have in stock.
Sorry, I couldn't find any matching items.
Model: do you have offers on home entertainment? Let me check if we have any offers on home entertainment.
Sorry, I couldn't find any matching items.
Model: any offers going on? Sure, let me see what products we have in stock.
Sorry, I couldn't find any matching items.


RuntimeError: cannot reshape tensor of 0 elements into shape [-1, 0] because the unspecified dimension size -1 can be any value and is ambiguous

In [15]:
import requests
import pandas as pd
from io import StringIO
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import torch

# Google Sheets configuration
SPREADSHEET_ID = '1lNZDwXKNlNkZiLWKksob55RE70e3IbSAhkR0W_pjGV4'
SHEET_NAME = 'Sheet1'  # Adjust if necessary
URL = f'https://docs.google.com/spreadsheets/d/{SPREADSHEET_ID}/gviz/tq?tqx=out:csv&sheet={SHEET_NAME}'

def fetch_data_from_sheet(url):
    """Fetches CSV data from the Google Sheet URL."""
    response = requests.get(url)
    if response.status_code == 200:
        return response.content.decode('utf-8')
    else:
        print('Failed to fetch data.')
        return None

def preprocess_data(df):
    """Preprocess the DataFrame to handle empty rows and group product details."""
    df = df.fillna('')
    df['Product'] = df['Product'].replace('', pd.NA).ffill()
    return df.dropna(subset=['Product'])

def generate_response(prompt, model, tokenizer, max_length=100):
    """Generates a response from the model based on the prompt."""
    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=max_length)
    input_ids = inputs["input_ids"]
    attention_mask = inputs["attention_mask"]

    with torch.no_grad():
        output_ids = model.generate(
            input_ids,
            attention_mask=attention_mask,
            max_length=max_length,
            num_return_sequences=1,
            do_sample=True,
            temperature=0.7,
            top_p=0.9,
            pad_token_id=tokenizer.pad_token_id,
        )

    generated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
    return generated_text

def extract_keywords_from_text(text, keywords):
    """Extracts keywords from the given text."""
    text_lower = text.lower()
    extracted_keywords = [keyword for keyword in keywords if keyword.lower() in text_lower]
    return extracted_keywords

def main():
    # Load the trained model and tokenizer
    model_name = 'custom_seller_bot_model'  # Directory where your trained model is saved
    tokenizer = GPT2Tokenizer.from_pretrained(model_name)
    model = GPT2LMHeadModel.from_pretrained(model_name)
    model.eval()

    # Fetch data from Google Sheets
    csv_data = fetch_data_from_sheet(URL)
    if not csv_data:
        print("Failed to fetch data from the Google Sheet.")
        return
    
    # Convert CSV data to DataFrame
    df = pd.read_csv(StringIO(csv_data))
    
    # Preprocess the data
    df = preprocess_data(df)
    
    # Extract keywords from the DataFrame
    keywords = df['Product'].dropna().unique().tolist()
    
    # Debug: Print keywords and DataFrame contents
    print(f"Keywords from DataFrame: {keywords}")
    print(f"DataFrame contents:\n{df}")

    while True:
        # Get user input
        user_input = input("You: ")
        if user_input.lower() == 'exit':
            print("Exiting...")
            break

        # Generate response from the model
        model_response = generate_response(user_input, model, tokenizer)
        print(f"Model: {model_response}")

        # Extract keywords from the model response
        matched_keywords = extract_keywords_from_text(model_response, keywords)

        # Debug: Print extracted keywords from response
        print(f"Keywords extracted from response: {matched_keywords}")

        if matched_keywords:
            for keyword in matched_keywords:
                # Check for matches in DataFrame
                matched_rows = df[df['Product'].str.contains(keyword, case=False, na=False)]
                if not matched_rows.empty:
                    for _, row in matched_rows.iterrows():
                        product_name = row['Product']
                        brand_type = row['BrandType']
                        price = row['Price']
                        details = f"We have {product_name}, {brand_type} with the price {price}"
                        print(details)
                else:
                    print(f"Sorry, we don't have information on {keyword}.")
        else:
            print("Sorry, I couldn't find any matching items.")

if __name__ == '__main__':
    main()


Keywords from DataFrame: ['TV', 'gas cooker ', 'Fridge', 'rice cooker ']
DataFrame contents:
        Product     BrandType     Price
0            TV  samsung 32''  Rs.38000
1            TV  samsung 48''  Rs.48000
2            TV        LG 72"  Rs.60000
3   gas cooker          usha   Rs.15000
4   gas cooker        samsung   Rs24000
5        Fridge         sisil  Rs.78000
6        Fridge       samsung  Rs.90000
7  rice cooker        samsung   Rs.6700
Model: i want buy a gas cooker. Let me check the latest price for you.
Keywords extracted from response: []
Sorry, I couldn't find any matching items.


RuntimeError: cannot reshape tensor of 0 elements into shape [-1, 0] because the unspecified dimension size -1 can be any value and is ambiguous

In [16]:
import requests
import pandas as pd
from io import StringIO
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import torch

# Google Sheets configuration
SPREADSHEET_ID = '1lNZDwXKNlNkZiLWKksob55RE70e3IbSAhkR0W_pjGV4'
SHEET_NAME = 'Sheet1'  # Adjust if necessary
URL = f'https://docs.google.com/spreadsheets/d/{SPREADSHEET_ID}/gviz/tq?tqx=out:csv&sheet={SHEET_NAME}'

def fetch_data_from_sheet(url):
    """Fetches CSV data from the Google Sheet URL."""
    response = requests.get(url)
    if response.status_code == 200:
        return response.content.decode('utf-8')
    else:
        print('Failed to fetch data.')
        return None

def preprocess_data(df):
    """Preprocess the DataFrame to handle empty rows and group product details."""
    df = df.fillna('')
    df['Product'] = df['Product'].replace('', pd.NA).ffill()
    return df.dropna(subset=['Product'])

def generate_response(prompt, model, tokenizer, max_length=100):
    """Generates a response from the model based on the prompt."""
    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=max_length)
    input_ids = inputs["input_ids"]
    attention_mask = inputs["attention_mask"]

    with torch.no_grad():
        output_ids = model.generate(
            input_ids,
            attention_mask=attention_mask,
            max_length=max_length,
            num_return_sequences=1,
            do_sample=True,
            temperature=0.7,
            top_p=0.9,
            pad_token_id=tokenizer.pad_token_id,
        )

    generated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
    return generated_text

def extract_keywords_from_text(text, keywords):
    """Extracts keywords from the given text."""
    text_lower = text.lower()
    extracted_keywords = [keyword.strip().lower() for keyword in keywords if keyword.strip().lower() in text_lower]
    return extracted_keywords

def main():
    # Load the trained model and tokenizer
    model_name = 'custom_seller_bot_model'  # Directory where your trained model is saved
    tokenizer = GPT2Tokenizer.from_pretrained(model_name)
    model = GPT2LMHeadModel.from_pretrained(model_name)
    model.eval()

    # Fetch data from Google Sheets
    csv_data = fetch_data_from_sheet(URL)
    if not csv_data:
        print("Failed to fetch data from the Google Sheet.")
        return
    
    # Convert CSV data to DataFrame
    df = pd.read_csv(StringIO(csv_data))
    
    # Preprocess the data
    df = preprocess_data(df)
    
    # Extract keywords from the DataFrame
    keywords = df['Product'].dropna().unique().tolist()
    keywords = [keyword.strip() for keyword in keywords]  # Remove extra spaces

    # Debug: Print keywords and DataFrame contents
    print(f"Keywords from DataFrame: {keywords}")
    print(f"DataFrame contents:\n{df}")

    while True:
        # Get user input
        user_input = input("You: ")
        if user_input.lower() == 'exit':
            print("Exiting...")
            break

        # Generate response from the model
        model_response = generate_response(user_input, model, tokenizer)
        print(f"Model: {model_response}")

        # Extract keywords from the model response
        matched_keywords = extract_keywords_from_text(model_response, keywords)

        # Debug: Print extracted keywords from response
        print(f"Keywords extracted from response: {matched_keywords}")

        if matched_keywords:
            for keyword in matched_keywords:
                # Check for matches in DataFrame
                matched_rows = df[df['Product'].str.contains(keyword, case=False, na=False)]
                if not matched_rows.empty:
                    for _, row in matched_rows.iterrows():
                        product_name = row['Product']
                        brand_type = row['BrandType']
                        price = row['Price']
                        details = f"We have {product_name}, {brand_type} with the price {price}"
                        print(details)
                else:
                    print(f"Sorry, we don't have information on {keyword}.")
        else:
            print("Sorry, I couldn't find any matching items.")

if __name__ == '__main__':
    main()



Keywords from DataFrame: ['TV', 'gas cooker', 'Fridge', 'rice cooker']
DataFrame contents:
        Product     BrandType     Price
0            TV  samsung 32''  Rs.38000
1            TV  samsung 48''  Rs.48000
2            TV        LG 72"  Rs.60000
3   gas cooker          usha   Rs.15000
4   gas cooker        samsung   Rs24000
5        Fridge         sisil  Rs.78000
6        Fridge       samsung  Rs.90000
7  rice cooker        samsung   Rs.6700
Model: i want buy a tv. Let me check the latest price for you.
Keywords extracted from response: ['tv']
We have TV, samsung 32'' with the price Rs.38000
We have TV, samsung 48'' with the price Rs.48000
We have TV, LG 72" with the price Rs.60000
Model: i want buy a gascooker. Let me check the latest price for you.
Keywords extracted from response: []
Sorry, I couldn't find any matching items.


RuntimeError: cannot reshape tensor of 0 elements into shape [-1, 0] because the unspecified dimension size -1 can be any value and is ambiguous

f the issue is that keywords with spaces (like "gas cooker") are not being matched correctly, we need to adjust the keyword matching logic to handle multi-word keywords effectively. Here’s a solution that includes:

Handling Multi-Word Keywords: Match multi-word keywords by checking if all parts of the keyword are present in the text.
Improving Keyword Matching: Enhance the matching function to account for both exact and partial matches.
Updated Code with Multi-Word Keyword Handling

In [17]:
import requests
import pandas as pd
from io import StringIO
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import torch

# Google Sheets configuration
SPREADSHEET_ID = '1lNZDwXKNlNkZiLWKksob55RE70e3IbSAhkR0W_pjGV4'
SHEET_NAME = 'Sheet1'  # Adjust if necessary
URL = f'https://docs.google.com/spreadsheets/d/{SPREADSHEET_ID}/gviz/tq?tqx=out:csv&sheet={SHEET_NAME}'

def fetch_data_from_sheet(url):
    """Fetches CSV data from the Google Sheet URL."""
    response = requests.get(url)
    if response.status_code == 200:
        return response.content.decode('utf-8')
    else:
        print('Failed to fetch data.')
        return None

def preprocess_data(df):
    """Preprocess the DataFrame to handle empty rows and group product details."""
    df = df.fillna('')
    df['Product'] = df['Product'].replace('', pd.NA).ffill()
    return df.dropna(subset=['Product'])

def generate_response(prompt, model, tokenizer, max_length=100):
    """Generates a response from the model based on the prompt."""
    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=max_length)
    input_ids = inputs["input_ids"]
    attention_mask = inputs["attention_mask"]

    with torch.no_grad():
        output_ids = model.generate(
            input_ids,
            attention_mask=attention_mask,
            max_length=max_length,
            num_return_sequences=1,
            do_sample=True,
            temperature=0.7,
            top_p=0.9,
            pad_token_id=tokenizer.pad_token_id,
        )

    generated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
    return generated_text

def extract_keywords_from_text(text, keywords):
    """Extracts multi-word keywords from the given text."""
    text_lower = text.lower()
    extracted_keywords = []
    
    for keyword in keywords:
        keyword_lower = keyword.strip().lower()
        if keyword_lower in text_lower:
            extracted_keywords.append(keyword_lower)
    
    return extracted_keywords

def main():
    # Load the trained model and tokenizer
    model_name = 'custom_seller_bot_model'  # Directory where your trained model is saved
    tokenizer = GPT2Tokenizer.from_pretrained(model_name)
    model = GPT2LMHeadModel.from_pretrained(model_name)
    model.eval()

    # Fetch data from Google Sheets
    csv_data = fetch_data_from_sheet(URL)
    if not csv_data:
        print("Failed to fetch data from the Google Sheet.")
        return
    
    # Convert CSV data to DataFrame
    df = pd.read_csv(StringIO(csv_data))
    
    # Preprocess the data
    df = preprocess_data(df)
    
    # Extract keywords from the DataFrame
    keywords = df['Product'].dropna().unique().tolist()
    keywords = [keyword.strip() for keyword in keywords]  # Remove extra spaces

    # Debug: Print keywords and DataFrame contents
    print(f"Keywords from DataFrame: {keywords}")
    print(f"DataFrame contents:\n{df}")

    while True:
        # Get user input
        user_input = input("You: ")
        if user_input.lower() == 'exit':
            print("Exiting...")
            break

        # Generate response from the model
        model_response = generate_response(user_input, model, tokenizer)
        print(f"Model: {model_response}")

        # Extract keywords from the model response
        matched_keywords = extract_keywords_from_text(model_response, keywords)

        # Debug: Print extracted keywords from response
        print(f"Keywords extracted from response: {matched_keywords}")

        if matched_keywords:
            for keyword in matched_keywords:
                # Check for matches in DataFrame
                matched_rows = df[df['Product'].str.contains(keyword, case=False, na=False)]
                if not matched_rows.empty:
                    for _, row in matched_rows.iterrows():
                        product_name = row['Product']
                        brand_type = row['BrandType']
                        price = row['Price']
                        details = f"We have {product_name}, {brand_type} with the price {price}"
                        print(details)
                else:
                    print(f"Sorry, we don't have information on {keyword}.")
        else:
            print("Sorry, I couldn't find any matching items.")

if __name__ == '__main__':
    main()


Keywords from DataFrame: ['TV', 'gas cooker', 'Fridge', 'rice cooker']
DataFrame contents:
        Product     BrandType     Price
0            TV  samsung 32''  Rs.38000
1            TV  samsung 48''  Rs.48000
2            TV        LG 72"  Rs.60000
3   gas cooker          usha   Rs.15000
4   gas cooker        samsung   Rs24000
5        Fridge         sisil  Rs.78000
6        Fridge       samsung  Rs.90000
7  rice cooker        samsung   Rs.6700
Model: i want buy a gas cooker. Let me check the latest price for you.
Keywords extracted from response: ['gas cooker']
We have gas cooker , usha  with the price Rs.15000
We have gas cooker , samsung with the price Rs24000
Model: i want buy a tv. Let me check the latest price for you.
Keywords extracted from response: ['tv']
We have TV, samsung 32'' with the price Rs.38000
We have TV, samsung 48'' with the price Rs.48000
We have TV, LG 72" with the price Rs.60000
Model: i want buy a rice cooker. Let me check the available rice cookers for you.

RuntimeError: cannot reshape tensor of 0 elements into shape [-1, 0] because the unspecified dimension size -1 can be any value and is ambiguous

## Voice interaction

## speaking update one

In [21]:
import requests
import pandas as pd
from io import StringIO
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import torch
import speech_recognition as sr
from gtts import gTTS
from playsound import playsound
import os

# Google Sheets configuration
SPREADSHEET_ID = '1lNZDwXKNlNkZiLWKksob55RE70e3IbSAhkR0W_pjGV4'
SHEET_NAME = 'Sheet1'  # Adjust if necessary
URL = f'https://docs.google.com/spreadsheets/d/{SPREADSHEET_ID}/gviz/tq?tqx=out:csv&sheet={SHEET_NAME}'

def fetch_data_from_sheet(url):
    """Fetches CSV data from the Google Sheet URL."""
    response = requests.get(url)
    if response.status_code == 200:
        return response.content.decode('utf-8')
    else:
        print('Failed to fetch data.')
        return None

def preprocess_data(df):
    """Preprocess the DataFrame to handle empty rows and group product details."""
    df = df.fillna('')
    df['Product'] = df['Product'].replace('', pd.NA).ffill()
    return df.dropna(subset=['Product'])

def generate_response(prompt, model, tokenizer, max_length=100):
    """Generates a response from the model based on the prompt."""
    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=max_length)
    input_ids = inputs["input_ids"]
    attention_mask = inputs["attention_mask"]

    with torch.no_grad():
        output_ids = model.generate(
            input_ids,
            attention_mask=attention_mask,
            max_length=max_length,
            num_return_sequences=1,
            do_sample=True,
            temperature=0.7,
            top_p=0.9,
            pad_token_id=tokenizer.pad_token_id,
        )

    generated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
    return generated_text

def extract_keywords_from_text(text, keywords):
    """Extracts multi-word keywords from the given text."""
    text_lower = text.lower()
    extracted_keywords = []
    
    for keyword in keywords:
        keyword_lower = keyword.strip().lower()
        if keyword_lower in text_lower:
            extracted_keywords.append(keyword_lower)
    
    return extracted_keywords

def speak(text):
    """Convert text to speech and play it."""
    tts = gTTS(text=text, lang='en')
    tts.save("response.mp3")
    playsound("response.mp3")
    os.remove("response.mp3")

def get_voice_input():
    """Capture voice input and convert it to text."""
    recognizer = sr.Recognizer()
    with sr.Microphone() as source:
        print("Listening...")
        audio = recognizer.listen(source)
    try:
        text = recognizer.recognize_google(audio)
        print(f"You: {text}")
        return text
    except sr.UnknownValueError:
        print("Sorry, I did not understand that.")
        speak("Sorry, I did not understand that.")
        return ""
    except sr.RequestError:
        print("Sorry, my speech service is down.")
        speak("Sorry, my speech service is down.")
        return ""

def main():
    # Load the trained model and tokenizer
    model_name = 'custom_seller_bot_model'  # Directory where your trained model is saved
    tokenizer = GPT2Tokenizer.from_pretrained(model_name)
    model = GPT2LMHeadModel.from_pretrained(model_name)
    model.eval()

    # Fetch data from Google Sheets
    csv_data = fetch_data_from_sheet(URL)
    if not csv_data:
        print("Failed to fetch data from the Google Sheet.")
        return
    
    # Convert CSV data to DataFrame
    df = pd.read_csv(StringIO(csv_data))
    
    # Preprocess the data
    df = preprocess_data(df)
    
    # Extract keywords from the DataFrame
    keywords = df['Product'].dropna().unique().tolist()
    keywords = [keyword.strip() for keyword in keywords]  # Remove extra spaces

    # Debug: Print keywords and DataFrame contents
    print(f"Keywords from DataFrame: {keywords}")
    print(f"DataFrame contents:\n{df}")

    while True:
        # Get user input
        user_input = get_voice_input()
        if user_input.lower() == 'exit':
            print("Exiting...")
            speak("Exiting.")
            break

        if user_input.strip() == "":
            continue

        # Generate response from the model
        model_response = generate_response(user_input, model, tokenizer)
        print(f"Model: {model_response}")
        speak(model_response)

        # Extract keywords from the model response
        matched_keywords = extract_keywords_from_text(model_response, keywords)

        # Debug: Print extracted keywords from response
        print(f"Keywords extracted from response: {matched_keywords}")

        if matched_keywords:
            for keyword in matched_keywords:
                # Check for matches in DataFrame
                matched_rows = df[df['Product'].str.contains(keyword, case=False, na=False)]
                if not matched_rows.empty:
                    for _, row in matched_rows.iterrows():
                        product_name = row['Product']
                        brand_type = row['BrandType']
                        price = row['Price']
                        details = f"We have {product_name}, {brand_type} with the price {price}"
                        print(details)
                        speak(details)
                else:
                    print(f"Sorry, we don't have information on {keyword}.")
                    speak(f"Sorry, we don't have information on {keyword}.")
        else:
            print("Sorry, I couldn't find any matching items.")
            speak("Sorry, I couldn't find any matching items.")

if __name__ == '__main__':
    main()


Keywords from DataFrame: ['TV', 'gas cooker', 'Fridge', 'rice cooker', 'offers']
DataFrame contents:
        Product         BrandType     Price
0            TV      samsung 32''  Rs.38000
1            TV      samsung 48''  Rs.48000
2            TV            LG 72"  Rs.60000
3   gas cooker              usha   Rs.15000
4   gas cooker            samsung   Rs24000
5        Fridge             sisil  Rs.78000
6        Fridge           samsung  Rs.90000
7  rice cooker            samsung   Rs.6700
8        offers    samsung tv 32"   10% off
9        offers  usha rice cooker    5% off
Listening...
You: I want buy a TV
Model: I want buy a TV. Let me check the latest price for you.
Keywords extracted from response: ['tv']
We have TV, samsung 32'' with the price Rs.38000
We have TV, samsung 48'' with the price Rs.48000
We have TV, LG 72" with the price Rs.60000
Listening...
You: I won't buy a gas Google
Model: I won't buy a gas Google. Let me check if we have any Google products on the market.
K

## update code

In [29]:
import requests
import pandas as pd
from io import StringIO
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import torch
import speech_recognition as sr
from gtts import gTTS
from playsound import playsound
import os

# Google Sheets configuration
SPREADSHEET_ID = '1lNZDwXKNlNkZiLWKksob55RE70e3IbSAhkR0W_pjGV4'
SHEET_NAME = 'Sheet1'  # Adjust if necessary
URL = f'https://docs.google.com/spreadsheets/d/{SPREADSHEET_ID}/gviz/tq?tqx=out:csv&sheet={SHEET_NAME}'

def fetch_data_from_sheet(url):
    """Fetches CSV data from the Google Sheet URL."""
    response = requests.get(url)
    if response.status_code == 200:
        return response.content.decode('utf-8')
    else:
        print('Failed to fetch data.')
        return None

def preprocess_data(df):
    """Preprocess the DataFrame to handle empty rows and group product details."""
    df = df.fillna('')
    df['Product'] = df['Product'].replace('', pd.NA).ffill()
    return df.dropna(subset=['Product'])

def generate_response(prompt, model, tokenizer, max_length=100):
    """Generates a response from the model based on the prompt."""
    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=max_length)
    input_ids = inputs["input_ids"]
    attention_mask = inputs["attention_mask"]

    with torch.no_grad():
        output_ids = model.generate(
            input_ids,
            attention_mask=attention_mask,
            max_length=max_length,
            num_return_sequences=1,
            do_sample=True,
            temperature=0.7,
            top_p=0.9,
            pad_token_id=tokenizer.pad_token_id,
        )

    generated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
    return generated_text

def extract_keywords_from_text(text, keywords):
    """Extracts multi-word keywords from the given text."""
    text_lower = text.lower()
    extracted_keywords = []
    
    for keyword in keywords:
        keyword_lower = keyword.strip().lower()
        if keyword_lower in text_lower:
            extracted_keywords.append(keyword_lower)
    
    return extracted_keywords

def speak(text):
    """Convert text to speech and play it."""
    tts = gTTS(text=text, lang='en')
    tts.save("response.mp3")
    playsound("response.mp3")
    os.remove("response.mp3")

def get_voice_input():
    """Capture voice input and convert it to text."""
    recognizer = sr.Recognizer()
    with sr.Microphone() as source:
        print("Listening...")
        audio = recognizer.listen(source)
    try:
        text = recognizer.recognize_google(audio)
        print(f"You: {text}")
        return text
    except sr.UnknownValueError:
        print("Sorry, I did not understand that.")
        speak("Sorry, I did not understand that.")
        return ""
    except sr.RequestError:
        print("Sorry, my speech service is down.")
        speak("Sorry, my speech service is down.")
        return ""

def main():
    # Load the trained model and tokenizer
    model_name = 'custom_seller_bot_model'  # Directory where your trained model is saved
    tokenizer = GPT2Tokenizer.from_pretrained(model_name)
    model = GPT2LMHeadModel.from_pretrained(model_name)
    model.eval()

    # Fetch data from Google Sheets
    csv_data = fetch_data_from_sheet(URL)
    if not csv_data:
        print("Failed to fetch data from the Google Sheet.")
        return
    
    # Convert CSV data to DataFrame
    df = pd.read_csv(StringIO(csv_data))
    
    # Preprocess the data
    df = preprocess_data(df)
    
    # Extract keywords from the DataFrame
    keywords = df['Product'].dropna().unique().tolist()
    keywords = [keyword.strip() for keyword in keywords]  # Remove extra spaces

    # Debug: Print keywords and DataFrame contents
    print(f"Keywords from DataFrame: {keywords}")
    print(f"DataFrame contents:\n{df}")

    while True:
        # Get user input
        user_input = get_voice_input()
        if user_input.lower() == 'exit':
            print("Exiting...")
            speak("Exiting.")
            break

        if user_input.strip() == "":
            continue

        # Generate response from the model without repeating the user's input
        prompt = f"Provide information about {user_input}"
        model_response = generate_response(prompt, model, tokenizer)
        print(f"Model: {model_response}")
        speak(model_response)

        # Extract keywords from the model response
        matched_keywords = extract_keywords_from_text(model_response, keywords)

        # Debug: Print extracted keywords from response
        print(f"Keywords extracted from response: {matched_keywords}")

        if matched_keywords:
            for keyword in matched_keywords:
                # Check for matches in DataFrame
                matched_rows = df[df['Product'].str.contains(keyword, case=False, na=False)]
                if not matched_rows.empty:
                    for _, row in matched_rows.iterrows():
                        product_name = row['Product']
                        brand_type = row['BrandType']
                        price = row['Price']
                        details = f"We have {product_name}, {brand_type} with the price {price}"
                        print(details)
                        speak(details)
                else:
                    print(f"Sorry, we don't have information on {keyword}.")
                    speak(f"Sorry, we don't have information on {keyword}.")
        else:
            print("Sorry, I couldn't find any matching items.")
            speak("Sorry, I couldn't find any matching items.")

        speak("Is there any other information you want to know?")

if __name__ == '__main__':
    main()


Keywords from DataFrame: ['TV', 'gas cooker', 'Fridge', 'rice cooker', 'offers']
DataFrame contents:
        Product         BrandType     Price
0            TV      samsung 32''  Rs.38000
1            TV      samsung 48''  Rs.48000
2            TV            LG 72"  Rs.60000
3   gas cooker              usha   Rs.15000
4   gas cooker            samsung   Rs24000
5        Fridge             sisil  Rs.78000
6        Fridge           samsung  Rs.90000
7  rice cooker            samsung   Rs.6700
8        offers    samsung tv 32"   10% off
9        offers  usha rice cooker    5% off
Listening...
You: I won't buy a TV
Model: Provide information about I won't buy a TV. Let me check if we have any news on I won't buy a TV.
Keywords extracted from response: ['tv']
We have TV, samsung 32'' with the price Rs.38000
We have TV, samsung 48'' with the price Rs.48000
We have TV, LG 72" with the price Rs.60000
Listening...
Sorry, I did not understand that.
Listening...
You: I want buy a gas cook
Model:

# remove repeating user inputs

In [42]:
import requests
import pandas as pd
from io import StringIO
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import torch
import speech_recognition as sr
from gtts import gTTS
from playsound import playsound
import os

# Google Sheets configuration
SPREADSHEET_ID = '1lNZDwXKNlNkZiLWKksob55RE70e3IbSAhkR0W_pjGV4'
SHEET_NAME = 'Sheet1'  # Adjust if necessary
URL = f'https://docs.google.com/spreadsheets/d/{SPREADSHEET_ID}/gviz/tq?tqx=out:csv&sheet={SHEET_NAME}'

def fetch_data_from_sheet(url):
    """Fetches CSV data from the Google Sheet URL."""
    response = requests.get(url)
    if response.status_code == 200:
        return response.content.decode('utf-8')
    else:
        print('Failed to fetch data.')
        return None

def preprocess_data(df):
    """Preprocess the DataFrame to handle empty rows and group product details."""
    df = df.fillna('')
    df['Product'] = df['Product'].replace('', pd.NA).ffill()
    return df.dropna(subset=['Product'])

def generate_response(prompt, model, tokenizer, max_length=100):
    """Generates a response from the model based on the prompt."""
    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=max_length)
    input_ids = inputs["input_ids"]
    attention_mask = inputs["attention_mask"]

    with torch.no_grad():
        output_ids = model.generate(
            input_ids,
            attention_mask=attention_mask,
            max_length=max_length,
            num_return_sequences=1,
            do_sample=True,
            temperature=0.7,
            top_p=0.9,
            pad_token_id=tokenizer.pad_token_id,
        )

    generated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
    return generated_text

def extract_keywords_from_text(text, keywords):
    """Extracts multi-word keywords from the given text."""
    text_lower = text.lower()
    extracted_keywords = []
    
    for keyword in keywords:
        keyword_lower = keyword.strip().lower()
        if keyword_lower in text_lower:
            extracted_keywords.append(keyword_lower)
    
    return extracted_keywords

def clean_model_response(prompt, response):
    """Remove the user's prompt from the model's response."""
    # Check if the response starts with the user's prompt and remove it
    if response.lower().startswith(prompt.lower()):
        response = response[len(prompt):].strip()
    return response

def format_response_for_speaking(response):
    """Format the response by removing trailing punctuation and excess spaces."""
    response = response.strip().rstrip('.')
    return response

def extract_second_part(response):
    """Extract the second part of the model response after removing the first part."""
    sentences = response.split('.')
    if len(sentences) > 1:
        second_part = '.'.join(sentences[1:]).strip()  # Join remaining sentences
        return second_part
    return ""

def speak(text):
    """Convert text to speech and play it."""
    tts = gTTS(text=text, lang='en')
    tts.save("response.mp3")
    playsound("response.mp3")
    os.remove("response.mp3")

def get_voice_input():
    """Capture voice input and convert it to text."""
    recognizer = sr.Recognizer()
    with sr.Microphone() as source:
        print("Listening...")
        audio = recognizer.listen(source)
    try:
        text = recognizer.recognize_google(audio)
        print(f"You: {text}")
        return text
    except sr.UnknownValueError:
        print("Sorry, I did not understand that.")
        speak("Sorry, I did not understand that.")
        return ""
    except sr.RequestError:
        print("Sorry, my speech service is down.")
        speak("Sorry, my speech service is down.")
        return ""

def main():
    # Load the trained model and tokenizer
    model_name = 'custom_seller_bot_model'  # Directory where your trained model is saved
    tokenizer = GPT2Tokenizer.from_pretrained(model_name)
    model = GPT2LMHeadModel.from_pretrained(model_name)
    model.eval()

    # Fetch data from Google Sheets
    csv_data = fetch_data_from_sheet(URL)
    if not csv_data:
        print("Failed to fetch data from the Google Sheet.")
        return
    
    # Convert CSV data to DataFrame
    df = pd.read_csv(StringIO(csv_data))
    
    # Preprocess the data
    df = preprocess_data(df)
    
    # Extract keywords from the DataFrame
    keywords = df['Product'].dropna().unique().tolist()
    keywords = [keyword.strip() for keyword in keywords]  # Remove extra spaces

    # Debug: Print keywords and DataFrame contents
    print(f"Keywords from DataFrame: {keywords}")
    print(f"DataFrame contents:\n{df}")

    while True:
        # Get user input
        user_input = get_voice_input()
        if user_input.lower() == 'exit':
            print("Exiting...")
            speak("Exiting.")
            break

        if user_input.strip() == "":
            continue

        # Generate response from the model
        prompt = user_input
        model_response = generate_response(prompt, model, tokenizer)

        # Clean model response
        cleaned_response = clean_model_response(prompt, model_response)
        
        # Extract the second part of the model response
        second_part = extract_second_part(cleaned_response)
        second_part = format_response_for_speaking(second_part)

        # Extract keywords from the full model response
        matched_keywords = extract_keywords_from_text(cleaned_response, keywords)

        # Debug: Print extracted keywords from response
        print(f"Model response: {cleaned_response}")
        print(f"Keywords extracted from response: {matched_keywords}")

        # Speak the second part of the cleaned model response
        if second_part:
            speak(second_part)

        # Check if keywords matched and provide details
        if matched_keywords:
            for keyword in matched_keywords:
                # Check for matches in DataFrame
                matched_rows = df[df['Product'].str.contains(keyword, case=False, na=False)]
                if not matched_rows.empty:
                    for _, row in matched_rows.iterrows():
                        product_name = row['Product']
                        brand_type = row['BrandType']
                        price = row['Price']
                        details = f"We have {product_name}, {brand_type} with the price {price}"
                        print(details)
                        speak(details)
                else:
                    print(f"Sorry, we don't have information on {keyword}.")
                    speak(f"Sorry, we don't have information on {keyword}.")
        else:
            print("Sorry, I couldn't find any matching items.")
            speak("Sorry, I couldn't find any matching items.")

        speak("Is there any other information you want to know?")

if __name__ == '__main__':
    main()


Keywords from DataFrame: ['TV', 'gas cooker', 'Fridge', 'rice cooker', 'offers']
DataFrame contents:
        Product         BrandType     Price
0            TV      samsung 32''  Rs.38000
1            TV      samsung 48''  Rs.48000
2            TV            LG 72"  Rs.60000
3   gas cooker              usha   Rs.15000
4   gas cooker            samsung   Rs24000
5        Fridge             sisil  Rs.78000
6        Fridge           samsung  Rs.90000
7  rice cooker            samsung   Rs.6700
8        offers    samsung tv 32"   10% off
9        offers  usha rice cooker    5% off
Listening...
You: I won't buy a rice cooker
Model response: . Let me check the available rice cookers for you.
Keywords extracted from response: ['rice cooker']
We have rice cooker , samsung with the price Rs.6700
Listening...
You: I want buy a fridge
Model response: . Let me check the available fridges for you.
Keywords extracted from response: ['fridge']
We have Fridge, sisil with the price Rs.78000
We have Fr