In [62]:
import pandas as pd

data_set = pd.read_csv('../data/amazon_alexa.tsv', sep='\t')
data_set.head()

Unnamed: 0,rating,date,variation,verified_reviews,feedback
0,5,31-Jul-18,Charcoal Fabric,Love my Echo!,1
1,5,31-Jul-18,Charcoal Fabric,Loved it!,1
2,4,31-Jul-18,Walnut Finish,"Sometimes while playing a game, you can answer...",1
3,5,31-Jul-18,Charcoal Fabric,I have had a lot of fun with this thing. My 4 ...,1
4,5,31-Jul-18,Charcoal Fabric,Music,1


In [63]:
data=data_set[['verified_reviews','feedback']]
data.columns = ['review','label']

data.head()

Unnamed: 0,review,label
0,Love my Echo!,1
1,Loved it!,1
2,"Sometimes while playing a game, you can answer...",1
3,I have had a lot of fun with this thing. My 4 ...,1
4,Music,1


In [64]:
# count occs of each label
label_counts = data.value_counts('label')


# get the nb of rows to drop from the majority class
rows_to_drop = label_counts.max()- label_counts.min()

#drop rows from the majority class
if rows_to_drop>0:
    data_majority = data[data['label']==1]
    data_balanced = data.drop(data_majority.sample(rows_to_drop).index)
else:
    data_balanced = data.copy()

#check the new class balance
print(data_balanced['label'].value_counts())


label
1    257
0    257
Name: count, dtype: int64


### data processing

In [65]:
import re

def clean_text(text):
    # Check if the input is a string
    if not isinstance(text, str):
        return ""  # or return text if you want to leave non-string values unchanged
    
    # Remove special characters and punctuation
    text = re.sub(r'[^\w\s]', ' ', text)

    # Remove single chars
    text = re.sub(r'\b[a-zA-Z]\b', ' ', text)

    # Remove HTML tags
    text = re.sub(r'<[^>]*>', '', text)

    # Lowercase text
    text = text.lower()

    # Remove extra whitespaces
    text = re.sub(r'\s+', ' ', text)

    # Trim leading and trailing space
    text = text.strip()

    return text


In [66]:
data_balanced.head()


Unnamed: 0,review,label
1,Loved it!,1
21,"We love Alexa! We use her to play music, play ...",1
28,Fun item to play with and get used to using. ...,1
33,The speakers sound pretty good for being so sm...,1
38,This thing is way cool! You should get one. ...,1


In [67]:
# extract review column as a list
reviews = data_balanced['review'].tolist()

#clean the text in the list
cleaned_reviews = [clean_text(review) for review in reviews]

# add cleaned reviews as a new column to the dataframe
data_balanced['clean_review'] = cleaned_reviews



### data split

In [68]:
total_rows = len(data_balanced)
test_size = int(total_rows*0.95)

# randomly sample train_size rows for the training set
test_set = data_balanced.sample(test_size)

#get the remaining rows for the test set
train_set = data_balanced.drop(test_set.index)

### sentinent w/ LLM


In [69]:
import pathlib
import textwrap

import google.generativeai as genai 


In [70]:
from IPython.display import display
from IPython.display import Markdown

def to_markdown(text):
    text = text.replace('•', '  *')
    return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))




In [71]:
from dotenv import load_dotenv
import os

load_dotenv()

GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY')

if GOOGLE_API_KEY is None:
    print("GOOGLE_API_KEY environment variable not set.")
else:
    genai.configure(api_key=GOOGLE_API_KEY)


Python-dotenv could not parse statement starting at line 2


In [72]:
for m in genai.list_models():
    if 'generateContent' in m.supported_generation_methods:
        print(m.name)

models/gemini-1.0-pro-latest
models/gemini-1.0-pro
models/gemini-pro
models/gemini-1.0-pro-001
models/gemini-1.0-pro-vision-latest
models/gemini-pro-vision
models/gemini-1.5-pro-latest
models/gemini-1.5-pro-001
models/gemini-1.5-pro-002
models/gemini-1.5-pro
models/gemini-1.5-pro-exp-0801
models/gemini-1.5-pro-exp-0827
models/gemini-1.5-flash-latest
models/gemini-1.5-flash-001
models/gemini-1.5-flash-001-tuning
models/gemini-1.5-flash
models/gemini-1.5-flash-exp-0827
models/gemini-1.5-flash-002
models/gemini-1.5-flash-8b
models/gemini-1.5-flash-8b-001
models/gemini-1.5-flash-8b-latest
models/gemini-1.5-flash-8b-exp-0827
models/gemini-1.5-flash-8b-exp-0924
models/learnlm-1.5-pro-experimental
models/gemini-exp-1114
models/gemini-exp-1121


In [73]:
model = genai.GenerativeModel('gemini-pro')

In [74]:
response= model.generate_content('What is the meaning of life?')
to_markdown(response.text)


> **The meaning of life is a profound and subjective question that has been pondered by philosophers, theologians, scientists, and individuals throughout history.**
> 
> **There is no single, universally accepted answer, as the meaning of life is highly personal and can vary depending on one's beliefs, values, and experiences.**
> 
> **Nevertheless, here are some common perspectives on the meaning of life:**
> 
> * **Purpose-Driven:** Many believe that life has an inherent purpose, such as achieving personal fulfillment, making a positive impact on the world, or fulfilling one's spiritual destiny.
> 
> * **Experiential:** Others view life as an ongoing journey of experiences and sensations, and the meaning is found in the sum total of those experiences.
> 
> * **Relationship-Oriented:** For some, life's meaning is found in the relationships they have with others, including family, friends, and loved ones.
> 
> * **Contribution-Based:** There are those who believe that the meaning of life lies in making a meaningful contribution to society, such as through volunteerism, art, or scientific research.
> 
> * **Personal Growth:** Some view life as an opportunity for personal growth and development, seeking to improve oneself and reach one's full potential.
> 
> * **Spiritual Connection:** For many, the meaning of life is found in connecting with a higher power, such as God or a spiritual force.
> 
> * **Hedonistic:** Some people believe that the meaning of life is simply to pursue pleasure and avoid pain.
> 
> * **Existential:** Others view life as inherently meaningless and that it is up to each individual to create their own meaning.
> 
> **Ultimately, the meaning of life is a question that cannot be definitively answered.**
> 
> **However, by engaging in deep reflection, exploring different perspectives, and seeking out meaningful experiences, individuals can come to a deeper understanding of their own purpose and the significance of their existence.**

### single api call

In [75]:
test_set_sample = test_set.sample(20)

test_set_sample['pred_label']=''

test_set_sample

Unnamed: 0,review,label,clean_review,pred_label
945,"I’m having trouble connecting my tv to it, but...",1,having trouble connecting my tv to it but not ...,
1492,I have been through 4 Shows now. And ALL OF T...,0,have been through 4 shows now and all of them ...,
1061,I love all that you can do with the Echo Spot!,1,love all that you can do with the echo spot,
214,"Very cool product. Easy to setup, fun to use a...",1,very cool product easy to setup fun to use and...,
2631,I love this its super convenient and makes my ...,1,love this its super convenient and makes my li...,
2957,this is my first Alexa enabled device and so f...,1,this is my first alexa enabled device and so f...,
1082,Couldn't be more pleased. Amazon hit this one ...,1,couldn be more pleased amazon hit this one out...,
2643,Good speaker for bedroom or office,1,good speaker for bedroom or office,
2689,It does what it's suppose to do great product.,1,it does what it suppose to do great product,
418,I wanted to use these as a radio and intercom ...,0,wanted to use these as radio and intercom as w...,


### convert df to json using to_json() method 

In [76]:
json_data = test_set_sample[['clean_review','pred_label']].to_json(orient='records')
print(json_data)

[{"clean_review":"having trouble connecting my tv to it but not exactly tech smart sure one of my grandsons will figure it out for me so far ve been using it mostly for music","pred_label":""},{"clean_review":"have been through 4 shows now and all of them have been broken it has been anything from screens that lose lines of resolution to timers and reminders going off an hour late or reminders that trigger when trying to set other reminders don have these issues on my dot or my echo just on the show it would be nice if amazon could get there act together and actually fix this thing","pred_label":""},{"clean_review":"love all that you can do with the echo spot","pred_label":""},{"clean_review":"very cool product easy to setup fun to use and sound quality is much better than anticipated","pred_label":""},{"clean_review":"love this its super convenient and makes my life little easier","pred_label":""},{"clean_review":"this is my first alexa enabled device and so far its lived up to its re

In [77]:
prompt = f"""
You are an expert linguier, who is good at classifying customer review sentinence into Positive/Negative. 
Help me classify customer reviews into: Positive(label=1), and Negative(label=0).
Customer reviews are provided between three back ticks.
In your output, only return the Json code back as output - which is provided between three backticks. 
Your task is to update predicted labels under 'pred_label' in the Json code. 
Don't make any changes to Json code format, please. 


```
{json_data}
""" 
print(prompt)


You are an expert linguier, who is good at classifying customer review sentinence into Positive/Negative. 
Help me classify customer reviews into: Positive(label=1), and Negative(label=0).
Customer reviews are provided between three back ticks.
In your output, only return the Json code back as output - which is provided between three backticks. 
Your task is to update predicted labels under 'pred_label' in the Json code. 
Don't make any changes to Json code format, please. 


```
[{"clean_review":"having trouble connecting my tv to it but not exactly tech smart sure one of my grandsons will figure it out for me so far ve been using it mostly for music","pred_label":""},{"clean_review":"have been through 4 shows now and all of them have been broken it has been anything from screens that lose lines of resolution to timers and reminders going off an hour late or reminders that trigger when trying to set other reminders don have these issues on my dot or my echo just on the show it would 

In [78]:
response = model.generate_content(prompt)

print(response.text)

```
[{"clean_review":"having trouble connecting my tv to it but not exactly tech smart sure one of my grandsons will figure it out for me so far ve been using it mostly for music","pred_label":1},{"clean_review":"have been through 4 shows now and all of them have been broken it has been anything from screens that lose lines of resolution to timers and reminders going off an hour late or reminders that trigger when trying to set other reminders don have these issues on my dot or my echo just on the show it would be nice if amazon could get there act together and actually fix this thing","pred_label":0},{"clean_review":"love all that you can do with the echo spot","pred_label":1},{"clean_review":"very cool product easy to setup fun to use and sound quality is much better than anticipated","pred_label":1},{"clean_review":"love this its super convenient and makes my life little easier","pred_label":1},{"clean_review":"this is my first alexa enabled device and so far its lived up to its rep

In [79]:
import json

#clean the data by stripping the backticks
json_data = response.text.strip("`")

#load the cleaned data and convert to Df
data_ = json.loads(json_data)
df_sample = pd.DataFrame(data_)

df_sample

Unnamed: 0,clean_review,pred_label
0,having trouble connecting my tv to it but not ...,1
1,have been through 4 shows now and all of them ...,0
2,love all that you can do with the echo spot,1
3,very cool product easy to setup fun to use and...,1
4,love this its super convenient and makes my li...,1
5,this is my first alexa enabled device and so f...,1
6,couldn be more pleased amazon hit this one out...,1
7,good speaker for bedroom or office,1
8,it does what it suppose to do great product,1
9,wanted to use these as radio and intercom as w...,0


In [80]:
test_set_sample['pred_label'] = df_sample['pred_label'].values
test_set_sample

Unnamed: 0,review,label,clean_review,pred_label
945,"I’m having trouble connecting my tv to it, but...",1,having trouble connecting my tv to it but not ...,1
1492,I have been through 4 Shows now. And ALL OF T...,0,have been through 4 shows now and all of them ...,0
1061,I love all that you can do with the Echo Spot!,1,love all that you can do with the echo spot,1
214,"Very cool product. Easy to setup, fun to use a...",1,very cool product easy to setup fun to use and...,1
2631,I love this its super convenient and makes my ...,1,love this its super convenient and makes my li...,1
2957,this is my first Alexa enabled device and so f...,1,this is my first alexa enabled device and so f...,1
1082,Couldn't be more pleased. Amazon hit this one ...,1,couldn be more pleased amazon hit this one out...,1
2643,Good speaker for bedroom or office,1,good speaker for bedroom or office,1
2689,It does what it's suppose to do great product.,1,it does what it suppose to do great product,1
418,I wanted to use these as a radio and intercom ...,0,wanted to use these as radio and intercom as w...,0


In [81]:
#plotting confusion matrix for prediction
from sklearn.metrics import confusion_matrix

y_true = test_set_sample["label"]
y_pred = test_set_sample['pred_label']

confusion_matrix(y_true, y_pred)

array([[ 5,  0],
       [ 0, 15]])

### openai config

In [88]:
import openai

openai.api_key = os.getenv("OPEN_AI")


In [91]:
response = openai.ChatCompletion.create(
    model="gpt-3.5-turbo",
    messages=[{"role": "user", "content": "Hello, world!"}]
)
print(response.choices[0].message["content"])


RateLimitError: You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.

In [86]:
import time
def get_completion(prompt, model="gpt-3.5"):
    import openai
    openai.api_key = "your_openai_api_key"

    messages = [{"role": "user", "content": prompt}]
    try:
        response = openai.ChatCompletion.create(
            model=model, messages=messages, temperature=0
        )
        return response.choices[0].message["content"]
    except openai.error.RateLimitError:
        print("Rate limit exceeded. Retrying after 10 seconds...")
        time.sleep(10)
        return get_completion(prompt, model)


In [87]:
prompt = "Why is the sky blue?"

chatgpt_response = get_completion(prompt)

AuthenticationError: Incorrect API key provided: your_ope*******_key. You can find your API key at https://platform.openai.com/account/api-keys.