In [20]:
import pandas as pd

data_set = pd.read_csv('../data/amazon_alexa.tsv', sep='\t')
data_set.head()

Unnamed: 0,rating,date,variation,verified_reviews,feedback
0,5,31-Jul-18,Charcoal Fabric,Love my Echo!,1
1,5,31-Jul-18,Charcoal Fabric,Loved it!,1
2,4,31-Jul-18,Walnut Finish,"Sometimes while playing a game, you can answer...",1
3,5,31-Jul-18,Charcoal Fabric,I have had a lot of fun with this thing. My 4 ...,1
4,5,31-Jul-18,Charcoal Fabric,Music,1


In [21]:
data=data_set[['verified_reviews','feedback']]
data.columns = ['review','label']

data.head()

Unnamed: 0,review,label
0,Love my Echo!,1
1,Loved it!,1
2,"Sometimes while playing a game, you can answer...",1
3,I have had a lot of fun with this thing. My 4 ...,1
4,Music,1


In [22]:
# count occs of each label
label_counts = data.value_counts('label')


# get the nb of rows to drop from the majority class
rows_to_drop = label_counts.max()- label_counts.min()

#drop rows from the majority class
if rows_to_drop>0:
    data_majority = data[data['label']==1]
    data_balanced = data.drop(data_majority.sample(rows_to_drop).index)
else:
    data_balanced = data.copy()

#check the new class balance
print(data_balanced['label'].value_counts())


label
1    257
0    257
Name: count, dtype: int64


### data processing

In [23]:
import re

def clean_text(text):
    # Check if the input is a string
    if not isinstance(text, str):
        return ""  # or return text if you want to leave non-string values unchanged
    
    # Remove special characters and punctuation
    text = re.sub(r'[^\w\s]', ' ', text)

    # Remove single chars
    text = re.sub(r'\b[a-zA-Z]\b', ' ', text)

    # Remove HTML tags
    text = re.sub(r'<[^>]*>', '', text)

    # Lowercase text
    text = text.lower()

    # Remove extra whitespaces
    text = re.sub(r'\s+', ' ', text)

    # Trim leading and trailing space
    text = text.strip()

    return text


In [24]:
data_balanced.head()


Unnamed: 0,review,label
2,"Sometimes while playing a game, you can answer...",1
4,Music,1
17,We have only been using Alexa for a couple of ...,1
26,"I love my Echo. It's easy to operate, loads of...",1
41,My Android would not allow Alexa to connect. F...,1


In [25]:
# extract review column as a list
reviews = data_balanced['review'].tolist()

#clean the text in the list
cleaned_reviews = [clean_text(review) for review in reviews]

# add cleaned reviews as a new column to the dataframe
data_balanced['clean_review'] = cleaned_reviews



### data split

In [26]:
total_rows = len(data_balanced)
test_size = int(total_rows*0.95)

# randomly sample train_size rows for the training set
test_set = data_balanced.sample(test_size)

#get the remaining rows for the test set
train_set = data_balanced.drop(test_set.index)

### sentinent w/ LLM


In [27]:
import pathlib
import textwrap

import google.generativeai as genai 


In [28]:
from IPython.display import display
from IPython.display import Markdown

def to_markdown(text):
    text = text.replace('•', '  *')
    return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))




In [29]:
from dotenv import load_dotenv
import os

load_dotenv()

GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY')

if GOOGLE_API_KEY is None:
    print("GOOGLE_API_KEY environment variable not set.")
else:
    genai.configure(api_key=GOOGLE_API_KEY)


In [30]:
for m in genai.list_models():
    if 'generateContent' in m.supported_generation_methods:
        print(m.name)

models/gemini-1.0-pro-latest
models/gemini-1.0-pro
models/gemini-pro
models/gemini-1.0-pro-001
models/gemini-1.0-pro-vision-latest
models/gemini-pro-vision
models/gemini-1.5-pro-latest
models/gemini-1.5-pro-001
models/gemini-1.5-pro-002
models/gemini-1.5-pro
models/gemini-1.5-pro-exp-0801
models/gemini-1.5-pro-exp-0827
models/gemini-1.5-flash-latest
models/gemini-1.5-flash-001
models/gemini-1.5-flash-001-tuning
models/gemini-1.5-flash
models/gemini-1.5-flash-exp-0827
models/gemini-1.5-flash-002
models/gemini-1.5-flash-8b
models/gemini-1.5-flash-8b-001
models/gemini-1.5-flash-8b-latest
models/gemini-1.5-flash-8b-exp-0827
models/gemini-1.5-flash-8b-exp-0924
models/learnlm-1.5-pro-experimental
models/gemini-exp-1114
models/gemini-exp-1121


In [31]:
model = genai.GenerativeModel('gemini-pro')

In [32]:
response= model.generate_content('What is the meaning of life?')
to_markdown(response.text)


> The meaning of life is a profound and multifaceted question that has been pondered by philosophers, theologians, and scientists for centuries. There is no single, definitive answer that is universally accepted, as the meaning of life can vary depending on individual beliefs, experiences, and perspectives. However, some common themes that have emerged throughout history include:
> 
> 1. **Purpose and Fulfillment:** Many people believe that the meaning of life lies in finding a sense of purpose and fulfillment. This can involve pursuing a career or hobby that brings joy and satisfaction, making meaningful contributions to society, or engaging in activities that align with one's values and goals.
> 
> 2. **Relationships and Connections:** Human relationships are often considered a core component of the meaning of life. Building strong bonds with family, friends, and loved ones can provide a sense of belonging, support, and purpose.
> 
> 3. **Personal Growth and Development:** The journey of personal growth and development can also be a meaningful pursuit. Learning new skills, acquiring knowledge, and challenging oneself can lead to a sense of accomplishment and an expanded perspective on life.
> 
> 4. **Contribution and Legacy:** Some people find meaning in leaving a positive impact on the world. This can involve contributing to the well-being of others, making a difference in society, or creating a legacy that will be remembered long after they are gone.
> 
> 5. **Experiences and Appreciation:** Appreciating the beauty and wonder of life's experiences can be a profound source of meaning. This includes embracing both the joys and challenges that life presents, and finding gratitude in the present moment.
> 
> It is important to note that the meaning of life is not static or fixed. It can evolve and change over time as individuals mature, gain new experiences, and reassess their priorities. Ultimately, the meaning of life is a personal and subjective journey that each individual must explore and define for themselves.

### single api call

In [33]:
test_set_sample = test_set.sample(20)

test_set_sample['pred_label']=''

test_set_sample

Unnamed: 0,review,label,clean_review,pred_label
660,I set up the Echo Dot and it worked for an hou...,0,set up the echo dot and it worked for an hour ...,
504,There was something wrong with the first one t...,0,there was something wrong with the first one t...,
45,Very satisfied with the Echo. Alexa now contro...,1,very satisfied with the echo alexa now control...,
2117,I love how I can just push the microphone butt...,1,love how can just push the microphone button o...,
1841,Awesome! Easy set up. Linked right up with my ...,1,awesome easy set up linked right up with my ne...,
3126,,1,,
2589,Not all that happy. The speaker isn’t great an...,0,not all that happy the speaker isn great and f...,
2249,I bought and love them. Alexa is very convenient.,1,bought and love them alexa is very convenient,
2297,Great product. Love the larger remote! Great d...,1,great product love the larger remote great deal,
1094,,0,,


### convert df to json using to_json() method 

In [34]:
json_data = test_set_sample[['clean_review','pred_label']].to_json(orient='records')
print(json_data)

[{"clean_review":"set up the echo dot and it worked for an hour and then died completely do not buy refurbished sent back for return the day after received","pred_label":""},{"clean_review":"there was something wrong with the first one that no even the rep spoke with could fix the replacement was registered to someone else when the price of the dot went down to 39 99 same price as the refurbished ones bought brand spanking new one no problem with the new one","pred_label":""},{"clean_review":"very satisfied with the echo alexa now controls the lights in my family room and my wife can listen to jimmy buffet by just asking alexa","pred_label":""},{"clean_review":"love how can just push the microphone button on the remote and ask it to open any show and it ll find whether it on hulu or netflix and sign me in and start the show pretty amazing but each new technology sure is making me lazy or lol","pred_label":""},{"clean_review":"awesome easy set up linked right up with my nexia home autom

In [35]:
prompt = f"""
You are an expert linguier, who is good at classifying customer review sentinence into Positive/Negative. 
Help me classify customer reviews into: Positive(label=1), and Negative(label=0).
Customer reviews are provided between three back ticks.
In your output, only return the Json code back as output - which is provided between three backticks. 
Your task is to update predicted labels under 'pred_label' in the Json code. 
Don't make any changes to Json code format, please. 


```
{json_data}
""" 
print(prompt)


You are an expert linguier, who is good at classifying customer review sentinence into Positive/Negative. 
Help me classify customer reviews into: Positive(label=1), and Negative(label=0).
Customer reviews are provided between three back ticks.
In your output, only return the Json code back as output - which is provided between three backticks. 
Your task is to update predicted labels under 'pred_label' in the Json code. 
Don't make any changes to Json code format, please. 


```
[{"clean_review":"set up the echo dot and it worked for an hour and then died completely do not buy refurbished sent back for return the day after received","pred_label":""},{"clean_review":"there was something wrong with the first one that no even the rep spoke with could fix the replacement was registered to someone else when the price of the dot went down to 39 99 same price as the refurbished ones bought brand spanking new one no problem with the new one","pred_label":""},{"clean_review":"very satisfied w

In [36]:
response = model.generate_content(prompt)

print(response.text)

```
[{"clean_review":"set up the echo dot and it worked for an hour and then died completely do not buy refurbished sent back for return the day after received","pred_label":0},{"clean_review":"there was something wrong with the first one that no even the rep spoke with could fix the replacement was registered to someone else when the price of the dot went down to 39 99 same price as the refurbished ones bought brand spanking new one no problem with the new one","pred_label":0},{"clean_review":"very satisfied with the echo alexa now controls the lights in my family room and my wife can listen to jimmy buffet by just asking alexa","pred_label":1},{"clean_review":"love how can just push the microphone button on the remote and ask it to open any show and it ll find whether it on hulu or netflix and sign me in and start the show pretty amazing but each new technology sure is making me lazy or lol","pred_label":1},{"clean_review":"awesome easy set up linked right up with my nexia home autom

In [37]:
import json

#clean the data by stripping the backticks
json_data = response.text.strip("`")

#load the cleaned data and convert to Df
data_ = json.loads(json_data)
df_sample = pd.DataFrame(data_)

df_sample

Unnamed: 0,clean_review,pred_label
0,set up the echo dot and it worked for an hour ...,0
1,there was something wrong with the first one t...,0
2,very satisfied with the echo alexa now control...,1
3,love how can just push the microphone button o...,1
4,awesome easy set up linked right up with my ne...,1
5,,0
6,not all that happy the speaker isn great and f...,0
7,bought and love them alexa is very convenient,1
8,great product love the larger remote great deal,1
9,,0


In [38]:
test_set_sample['pred_label'] = df_sample['pred_label'].values
test_set_sample

Unnamed: 0,review,label,clean_review,pred_label
660,I set up the Echo Dot and it worked for an hou...,0,set up the echo dot and it worked for an hour ...,0
504,There was something wrong with the first one t...,0,there was something wrong with the first one t...,0
45,Very satisfied with the Echo. Alexa now contro...,1,very satisfied with the echo alexa now control...,1
2117,I love how I can just push the microphone butt...,1,love how can just push the microphone button o...,1
1841,Awesome! Easy set up. Linked right up with my ...,1,awesome easy set up linked right up with my ne...,1
3126,,1,,0
2589,Not all that happy. The speaker isn’t great an...,0,not all that happy the speaker isn great and f...,0
2249,I bought and love them. Alexa is very convenient.,1,bought and love them alexa is very convenient,1
2297,Great product. Love the larger remote! Great d...,1,great product love the larger remote great deal,1
1094,,0,,0


In [40]:
#plotting confusion matrix for prediction
from sklearn.metrics import confusion_matrix

y_true = test_set_sample["label"]
y_pred = test_set_sample['pred_label']

confusion_matrix(y_true, y_pred)

array([[ 9,  0],
       [ 1, 10]])

### openai config

In [41]:
import openai
