### Import Dataset

In [None]:
import pandas as pd

data = pd.read_csv( "amazon_alexa.tsv" , sep='\t')
data.head(10) 

Unnamed: 0,rating,date,variation,verified_reviews,feedback
0,5,31-Jul-18,Charcoal Fabric,Love my Echo!,1
1,5,31-Jul-18,Charcoal Fabric,Loved it!,1
2,4,31-Jul-18,Walnut Finish,"Sometimes while playing a game, you can answer...",1
3,5,31-Jul-18,Charcoal Fabric,I have had a lot of fun with this thing. My 4 ...,1
4,5,31-Jul-18,Charcoal Fabric,Music,1
5,5,31-Jul-18,Heather Gray Fabric,I received the echo as a gift. I needed anothe...,1
6,3,31-Jul-18,Sandstone Fabric,"Without having a cellphone, I cannot use many ...",1
7,5,31-Jul-18,Charcoal Fabric,I think this is the 5th one I've purchased. I'...,1
8,5,30-Jul-18,Heather Gray Fabric,looks great,1
9,5,30-Jul-18,Heather Gray Fabric,Love it! I’ve listened to songs I haven’t hear...,1


In [None]:
mydata = data[['verified_reviews','feedback']]
mydata.columns = ['review','label']

mydata.head()

Unnamed: 0,review,label
0,Love my Echo!,1
1,Loved it!,1
2,"Sometimes while playing a game, you can answer...",1
3,I have had a lot of fun with this thing. My 4 ...,1
4,Music,1


In [None]:
mydata.value_counts('label')

label
1    2893
0     257
dtype: int64

In [None]:
# Count the occurrences of each label
label_counts = mydata["label"].value_counts()

# Get the number of rows to drop from the majority class
rows_to_drop = label_counts.max() - label_counts.min()

# Drop rows from the majority class randomly
if rows_to_drop > 0:
   data_majority = mydata[mydata["label"] == 1]
   data_balanced = mydata.drop(data_majority.sample(rows_to_drop).index)
else:
   data_balanced = mydata.copy()

# Check the new class balance
print(data_balanced["label"].value_counts())

1    257
0    257
Name: label, dtype: int64


## Data Preprocessing

In [None]:
import re

def clean_text(text):
  # Remove special characters and punctuation
  text = re.sub(r"[^\w\s]", " ", text)

  # Remove single characters
  text = re.sub(r"\b[a-zA-Z]\b", " ", text)

  # Remove HTML tags
  text = re.sub(r"<[^>]*>", " ", text)

  # Lowercase the text
  text = text.lower()

  # Remove extra whitespace
  text = re.sub(r"\s+", " ", text)

  # Trim leading and trailing spaces
  text = text.strip()

  return text

In [None]:
import pandas as pd

# Extract the review column as a list
reviews = data_balanced['review'].tolist()

# Clean the text in the list
cleaned_reviews = [clean_text(review) for review in reviews]

# Add the cleaned reviews as a new column to the DataFrame
data_balanced['clean_reviews'] = cleaned_reviews

In [None]:
data_balanced

Unnamed: 0,review,label,clean_reviews
3,I have had a lot of fun with this thing. My 4 ...,1,have had lot of fun with this thing my 4 yr ol...
4,Music,1,music
30,Still learning all the capabilities...but so f...,1,still learning all the capabilities but so far...
43,Tried to play certain broadway shows like Came...,1,tried to play certain broadway shows like came...
44,Great,1,great
...,...,...,...
3096,The product sounded the same as the emoji spea...,0,the product sounded the same as the emoji spea...
3106,neat tool we enjoy it with the family,1,neat tool we enjoy it with the family
3109,Easy to set up and connect with smart devices....,1,easy to set up and connect with smart devices ...
3115,It is just not as loud as I thought it was goi...,1,it is just not as loud as thought it was going...


## Data Split

In [None]:
import pandas as pd

total_rows = len(data_balanced)
test_size = int(total_rows * 0.95)

test_set = data_balanced.sample(test_size)

# Get the remaining rows for the test set
train_set = data_balanced.drop(test_set.index)

## Sentiment w/ LLM

### Setting up Gemini API

In [None]:
!pip install -q -U google-generativeai

In [None]:
import pathlib
import textwrap

import google.generativeai as genai

from IPython.display import display
from IPython.display import Markdown


def to_markdown(text):
  text = text.replace('•', '  *')
  return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))

# Used to securely store your API key
from google.colab import userdata

In [None]:
# Or use `os.getenv('GOOGLE_API_KEY')` to fetch an environment variable.
GOOGLE_API_KEY=userdata.get('GOOGLE_API_KEY')

genai.configure(api_key=GOOGLE_API_KEY)

In [None]:
for m in genai.list_models():
  if 'generateContent' in m.supported_generation_methods:
    print(m.name)

models/gemini-pro
models/gemini-pro-vision


In [None]:
model = genai.GenerativeModel('gemini-pro')

In [None]:
%%time
response = model.generate_content("What is the meaning of life?")

to_markdown(response.text)

CPU times: user 120 ms, sys: 13 ms, total: 133 ms
Wall time: 9.62 s


> The meaning of life is a multifaceted concept that has been contemplated by philosophers, theologians, and individuals throughout history. There is no one definitive answer, as the meaning of life is a personal and subjective experience. However, some common themes that have emerged in discussions about the meaning of life include:
> 
> 1. **Purpose and Fulfillment:** Many people find meaning in their lives by pursuing goals and activities that bring them a sense of purpose and fulfillment. This could be through personal, professional, or creative endeavors, or by contributing to the well-being of others.
> 
> 2. **Relationships and Connection:** Human relationships and connections with others can be a significant source of meaning. Building strong and supportive relationships, whether with family, friends, or a community, can provide a sense of belonging, love, and shared experiences.
> 
> 3. **Values and Beliefs:** Living in accordance with one's values and beliefs can also contribute to a sense of meaning. This could involve striving for justice, equality, or other ethical ideals, or leading a life guided by spiritual or religious principles.
> 
> 4. **Growth and Learning:** The pursuit of knowledge, personal growth, and learning new skills can provide a sense of fulfillment and purpose. Continuously expanding one's horizons and challenging oneself intellectually can lead to a more meaningful life experience.
> 
> 5. **Contribution and Impact:** Making a positive impact on the world or leaving a legacy can be a source of meaning for many people. This could be through contributions to society, the environment, or future generations. It can involve volunteering, charitable work, or simply being a positive force in the lives of others.
> 
> 6. **Experiences and Moments:** Life experiences, both big and small, can contribute to a sense of meaning. Joyful moments, moments of awe and wonder, or overcoming challenges can all add depth and richness to life, making it more meaningful.
> 
> Ultimately, the meaning of life is a personal journey, and what brings meaning to one person may not be the same for another. It is a question that individuals may reflect on throughout their lives, and the answer may evolve over time.

#### Single API Call

In [None]:
test_set_sample = test_set.sample(20)

test_set_sample['pred_label'] = ''

test_set_sample

Unnamed: 0,review,label,clean_reviews,pred_label
66,Fast response which was amazing. Clear concis...,1,fast response which was amazing clear concise ...,
1820,The speakers on these devices are surprisingly...,1,the speakers on these devices are surprisingly...,
162,"Stopped working after 2 weeks ,didn't follow c...",0,stopped working after 2 weeks didn follow comm...,
835,"I have had for only a week, so I am still lear...",1,have had for only week so am still learning al...,
910,"Love these, great sound... easy to connect an...",1,love these great sound easy to connect and use,
1138,Love my Amazon products,1,love my amazon products,
2312,i do wish the dot could connect to the fire st...,1,do wish the dot could connect to the fire stic...,
1678,No YouTube,0,no youtube,
1240,I haven't figured out how to make or receive c...,0,haven figured out how to make or receive calls...,
1858,"Loved all about it, all I can do with it and i...",1,loved all about it all can do with it and it l...,


In [None]:
# Convert the DataFrame to JSON using the to_json() method

json_data = test_set_sample[['clean_reviews','pred_label']].to_json(orient='records')

# Print the JSON data
print(json_data)

[{"clean_reviews":"fast response which was amazing clear concise answers and sound quality is fantastic am still getting used to alexia and have not usde echo to its full extent","pred_label":""},{"clean_reviews":"the speakers on these devices are surprisingly good the functionality of each echo device is fantastic","pred_label":""},{"clean_reviews":"stopped working after 2 weeks didn follow commands really fun when it was working","pred_label":""},{"clean_reviews":"have had for only week so am still learning all that alexa can do it is learning process love the music can call up any time listen to podcasts and radio stations have just started keeping shopping lists and grocery lists which love don have to search for paper and pencil getting time and temperature are great do not have to wait for weather reports the sound is good am not fussy about woofers tweeters base etc the echo is fine for me this afternoon am going to try dropping in on my son when he gets home going to surprise h

In [None]:
prompt = f"""
You are an expert linguist, who is good at classifying customer review sentiments into Positive/Negative labels.
Help me classify customer reviews into: Positive(label=1), and Negative(label=0).
Customer reviews are provided between three back ticks.
In your output, only return the Json code back as output - which is provided between three backticks.
Your task is to update predicted labels under 'pred_label' in the Json code.
Don't make any changes to Json code format, please.

```
{json_data}
```
"""

print(prompt)


You are an expert linguist, who is good at classifying customer review sentiments into Positive/Negative labels.
Help me classify customer reviews into: Positive(label=1), and Negative(label=0).
Customer reviews are provided between three back ticks.
In your output, only return the Json code back as output - which is provided between three backticks.
Your task is to update predicted labels under 'pred_label' in the Json code.
Don't make any changes to Json code format, please.

```
[{"clean_reviews":"fast response which was amazing clear concise answers and sound quality is fantastic am still getting used to alexia and have not usde echo to its full extent","pred_label":""},{"clean_reviews":"the speakers on these devices are surprisingly good the functionality of each echo device is fantastic","pred_label":""},{"clean_reviews":"stopped working after 2 weeks didn follow commands really fun when it was working","pred_label":""},{"clean_reviews":"have had for only week so am still learni

In [None]:
response = model.generate_content(prompt)

print(response.text)

```
[{"clean_reviews":"fast response which was amazing clear concise answers and sound quality is fantastic am still getting used to alexia and have not usde echo to its full extent","pred_label":1},{"clean_reviews":"the speakers on these devices are surprisingly good the functionality of each echo device is fantastic","pred_label":1},{"clean_reviews":"stopped working after 2 weeks didn follow commands really fun when it was working","pred_label":0},{"clean_reviews":"have had for only week so am still learning all that alexa can do it is learning process love the music can call up any time listen to podcasts and radio stations have just started keeping shopping lists and grocery lists which love don have to search for paper and pencil getting time and temperature are great do not have to wait for weather reports the sound is good am not fussy about woofers tweeters base etc the echo is fine for me this afternoon am going to try dropping in on my son when he gets home going to surprise 

In [None]:
import json

# Clean the data by stripping the backticks
json_data = response.text.strip("`")

# Load the cleaned data and convert to DataFrame
data = json.loads(json_data)
df_sample = pd.DataFrame(data)

df_sample

Unnamed: 0,clean_reviews,pred_label
0,fast response which was amazing clear concise ...,1
1,the speakers on these devices are surprisingly...,1
2,stopped working after 2 weeks didn follow comm...,0
3,have had for only week so am still learning al...,1
4,love these great sound easy to connect and use,1
5,love my amazon products,1
6,do wish the dot could connect to the fire stic...,1
7,no youtube,0
8,haven figured out how to make or receive calls...,0
9,loved all about it all can do with it and it l...,1


In [None]:
# prompt: Overwrite pred_label from 'df' into pred_label in 'train_set_sample'

test_set_sample['pred_label'] = df_sample['pred_label'].values
test_set_sample

Unnamed: 0,review,label,clean_reviews,pred_label
66,Fast response which was amazing. Clear concis...,1,fast response which was amazing clear concise ...,1
1820,The speakers on these devices are surprisingly...,1,the speakers on these devices are surprisingly...,1
162,"Stopped working after 2 weeks ,didn't follow c...",0,stopped working after 2 weeks didn follow comm...,0
835,"I have had for only a week, so I am still lear...",1,have had for only week so am still learning al...,1
910,"Love these, great sound... easy to connect an...",1,love these great sound easy to connect and use,1
1138,Love my Amazon products,1,love my amazon products,1
2312,i do wish the dot could connect to the fire st...,1,do wish the dot could connect to the fire stic...,1
1678,No YouTube,0,no youtube,0
1240,I haven't figured out how to make or receive c...,0,haven figured out how to make or receive calls...,0
1858,"Loved all about it, all I can do with it and i...",1,loved all about it all can do with it and it l...,1


In [None]:
# Plotting confusion matrix on the predictions

from sklearn.metrics import confusion_matrix

y_true = test_set_sample["label"]
y_pred = test_set_sample["pred_label"]

confusion_matrix(y_true, y_pred)

array([[ 9,  0],
       [ 0, 11]])