### ChatBot Project - Jason Pereira

In [1]:
#Load and Explore the datasets

import json
import pandas as pd

# Load the JSON dataset
file_path = '../data/Ecommerce_FAQ_Chatbot_dataset.json'  # Update with your file path

# Load JSON file into Python
with open(file_path, 'r', encoding='utf-8') as file:
    data = json.load(file)

# Check if the JSON is a list or dictionary
if isinstance(data, list):
    # Convert list of dictionaries to DataFrame
    df = pd.DataFrame(data)
elif isinstance(data, dict):
    # If it's a dictionary, check its structure and convert to DataFrame
    df = pd.DataFrame.from_dict(data, orient='index').reset_index()
else:
    raise ValueError("Unexpected JSON structure. Please check the dataset.")

# Display basic information about the DataFrame
print(df.info())
print("\nFirst few rows:")
print(df.head())



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1 entries, 0 to 0
Data columns (total 80 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   index   1 non-null      object
 1   0       1 non-null      object
 2   1       1 non-null      object
 3   2       1 non-null      object
 4   3       1 non-null      object
 5   4       1 non-null      object
 6   5       1 non-null      object
 7   6       1 non-null      object
 8   7       1 non-null      object
 9   8       1 non-null      object
 10  9       1 non-null      object
 11  10      1 non-null      object
 12  11      1 non-null      object
 13  12      1 non-null      object
 14  13      1 non-null      object
 15  14      1 non-null      object
 16  15      1 non-null      object
 17  16      1 non-null      object
 18  17      1 non-null      object
 19  18      1 non-null      object
 20  19      1 non-null      object
 21  20      1 non-null      object
 22  21      1 non-null      object

In [2]:
import json

# Load the JSON dataset
file_path = '../data/Ecommerce_FAQ_Chatbot_dataset.json'  # Update with your file path

try:
    with open(file_path, 'r', encoding='utf-8') as file:
        data = json.load(file)

    # Inspect type and structure of data
    print("Type of data:", type(data))

    if isinstance(data, dict):
        print("\nKeys in the dataset:")
        print(list(data.keys())[:10])  # Print first 10 keys
        print("\nSample of first dictionary:")
        print(json.dumps(list(data.values())[:1], indent=2))
    elif isinstance(data, list):
        print("\nNumber of items:", len(data))
        print("\nFirst item in the dataset:")
        print(json.dumps(data[0], indent=2))
    else:
        print("Unexpected JSON structure.")
except FileNotFoundError:
    print(f"Error: File not found at {file_path}. Please ensure the file exists.")


Type of data: <class 'dict'>

Keys in the dataset:
['questions']

Sample of first dictionary:
[
  [
    {
      "question": "How can I create an account?",
      "answer": "To create an account, click on the 'Sign Up' button on the top right corner of our website and follow the instructions to complete the registration process."
    },
    {
      "question": "What payment methods do you accept?",
      "answer": "We accept major credit cards, debit cards, and PayPal as payment methods for online orders."
    },
    {
      "question": "How can I track my order?",
      "answer": "You can track your order by logging into your account and navigating to the 'Order History' section. There, you will find the tracking information for your shipment."
    },
    {
      "question": "What is your return policy?",
      "answer": "Our return policy allows you to return products within 30 days of purchase for a full refund, provided they are in their original condition and packaging. Please refe

In [3]:
#JSON dataset is a dictionary with a key named 'questions', which contains a list of Q&A pairs

try:
    # Load JSON file
    with open(file_path, 'r', encoding='utf-8') as file:
        data = json.load(file)

    # Extract the list of Q&A pairs from the 'questions' key
    qa_pairs = data.get('questions', [])

    # Convert the Q&A pairs to a DataFrame
    df = pd.DataFrame(qa_pairs)

    # Rename columns to match the expected format
    df.rename(columns={'question': 'Question', 'answer': 'Answer'}, inplace=True)

    # Display basic information about the DataFrame
    print("DataFrame Columns:")
    print(df.columns)
    print("\nFirst few rows:")
    print(df.head())

except FileNotFoundError:
    print(f"Error: File not found at {file_path}. Please ensure the file exists.")
except Exception as e:
    print(f"Error: {str(e)}")


DataFrame Columns:
Index(['Question', 'Answer'], dtype='object')

First few rows:
                              Question  \
0         How can I create an account?   
1  What payment methods do you accept?   
2            How can I track my order?   
3          What is your return policy?   
4               Can I cancel my order?   

                                              Answer  
0  To create an account, click on the 'Sign Up' b...  
1  We accept major credit cards, debit cards, and...  
2  You can track your order by logging into your ...  
3  Our return policy allows you to return product...  
4  You can cancel your order if it has not been s...  


In [4]:
#Set Up OpenAI GPT API
#Sign up for an OpenAI account and get an API key from OpenAI. Then, add this code to initialize the API:

import openai
import requests


# Set your OpenAI API key
API_URL = "https://api.openai.com/v1/chat/completions"
API_KEY = "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"  # Replace with your actual OpenAI API key

# Function to interact with GPT model
def ask_gpt(prompt):
    """
    Function to interact with OpenAI GPT API using REST requests.
    """
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {API_KEY}"
    }

    data = {
        "model": "gpt-4",  # Use "gpt-3.5-turbo" if GPT-4 is unavailable
        "messages": [
            {"role": "system", "content": "You are a helpful assistant for answering customer queries."},
            {"role": "user", "content": prompt}
        ],
        "max_tokens": 150,
        "temperature": 0.7
    }

    try:
        response = requests.post(API_URL, headers=headers, json=data)
        response.raise_for_status()  # Raise an error for bad status codes (e.g., 4xx or 5xx)
        return response.json()["choices"][0]["message"]["content"].strip()
    except requests.exceptions.RequestException as e:
        return f"Error: {str(e)}"

In [5]:
#Build Chatbot Logic
#Create a function that uses the dataset to answer FAQs or forwards queries to GPT for more complex responses:

def chatbot_response(user_query):
    """
    Function to handle chatbot responses.
    Searches for a matching question in the dataset.
    If no match is found, uses GPT for fallback.
    """
    try:
        # Search dataset for similar questions
        for index, row in df.iterrows():
            if user_query.lower() in row["Question"].lower():
                return f"Answer: {row['Answer']}"
        
        # If no match found, use GPT model (fallback)
        gpt_prompt = f"Answer this customer query: {user_query}"
        return ask_gpt(gpt_prompt)
    
    except Exception as e:
        return f"Error: {str(e)}"


In [6]:
#Create a User Interface with Gradio
#Use Gradio to create a simple web interface for interacting with your chatbot:

import gradio as gr

def chatbot_interface(user_input):
    """
    Gradio interface function for user interaction.
    """
    response = chatbot_response(user_input)
    return response

interface = gr.Interface(
    fn=chatbot_interface,
    inputs="text",
    outputs="text",
    title="E-commerce FAQ Chatbot with GPT Integration",
    description="Ask me anything about products, orders, or support! If I can't find an answer, I'll use AI to help you."
)

# Launch Gradio interface
interface.launch()


* Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.




## Encountered an error incorporating GhatGPT

## Approach: Use Hugging Face Transformers

In [None]:
%pip install torch

import json
import pandas as pd
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
import gradio as gr
import torch

# Step 1: Load and Process JSON Dataset
file_path = '..data/Ecommerce_FAQ_Chatbot_dataset.json'  # Update with your file path

try:
    # Load JSON file
    with open(file_path, 'r', encoding='utf-8') as file:
        data = json.load(file)

    # Extract the list of Q&A pairs from the 'questions' key
    qa_pairs = data.get('questions', [])

    # Convert Q&A pairs to DataFrame
    df = pd.DataFrame(qa_pairs)

    # Rename columns to match expected format
    df.rename(columns={'question': 'Question', 'answer': 'Answer'}, inplace=True)

except FileNotFoundError:
    print(f"Error: File not found at {file_path}. Please ensure the file exists.")
except Exception as e:
    print(f"Error: {str(e)}")

# Step 2: Set Up Hugging Face Transformers
# Load a pre-trained conversational model (e.g., GPT-2)
model_name = "gpt2"  # You can also use "EleutherAI/gpt-neo-1.3B" or other models from Hugging Face
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

# Create a text generation pipeline
generator = pipeline("text-generation", model=model, tokenizer=tokenizer)

def generate_response(prompt):
    """
    Generate a response using the Hugging Face Transformers model.
    """
    response = generator(prompt, max_length=150, num_return_sequences=1, temperature=0.7)
    return response[0]["generated_text"]

# Step 3: Build Chatbot Logic
def chatbot_response(user_query):
    """
    Function to handle chatbot responses.
    Searches for a matching question in the dataset.
    If no match is found, uses the Hugging Face model for fallback.
    """
    try:
        # Search dataset for similar questions
        for index, row in df.iterrows():
            if user_query.lower() in row["Question"].lower():
                return f"Answer: {row['Answer']}"
        
        # If no match found, use Hugging Face model (fallback)
        prompt = f"User: {user_query}\nAssistant:"
        return generate_response(prompt)
    
    except Exception as e:
        return f"Error: {str(e)}"

# Step 4: Create Gradio Interface
def chatbot_interface(user_input):
    """
    Gradio interface function for user interaction.
    """
    response = chatbot_response(user_input)
    return response

interface = gr.Interface(
    fn=chatbot_interface,
    inputs="text",
    outputs="text",
    title="E-commerce FAQ Chatbot with Hugging Face Integration",
    description="Ask me anything about products, orders, or support! If I can't find an answer, I'll use AI to help you."
)

# Step 5: Launch Gradio Interface
interface.launch()


Note: you may need to restart the kernel to use updated packages.
Error: File not found at ..data/Ecommerce_FAQ_Chatbot_dataset.json. Please ensure the file exists.


Device set to use cpu


* Running on local URL:  http://127.0.0.1:7861

To create a public link, set `share=True` in `launch()`.




Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


## Testing
Input (Dataset Query):
How can I track my order?
Output (From Dataset):
Answer: You can track your order by logging into your account and navigating to the 'Order History' section.
Input (Fallback Query):
What is your shipping policy?
Output (Generated by Hugging Face Model):
Our shipping policy depends on your location and selected shipping method. (or another AI-generated response)
Advantages of This Approach
No Dependency on OpenAI API:
You don’t need an OpenAI API key or internet access if running locally.
Open Source:
Models like GPT-2 and GPT-Neo are open source and freely available.
Customizable:
You can fine-tune these models on your dataset for better performance.
Scalable:
Run these models locally or deploy them on cloud infrastructure as needed.
Next Steps
Fine-Tune the Model:
Fine-tune GPT-2 or GPT-Neo on your dataset for more accurate responses.
Switch to Larger Models:
Use larger models like EleutherAI/gpt-neo-2.7B or GPT-J for better quality responses.
Deploy Online:
Deploy your chatbot using platforms like Streamlit Cloud or Hugging Face Spaces.

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