In [14]:
import pandas as pd

data_set = pd.read_csv('C:/Users/Morsi Store DZ/sentinent-analysis-using-gemini-api/data/amazon_alexa.tsv', sep='\t')
data_set.head()

Unnamed: 0,rating,date,variation,verified_reviews,feedback
0,5,31-Jul-18,Charcoal Fabric,Love my Echo!,1
1,5,31-Jul-18,Charcoal Fabric,Loved it!,1
2,4,31-Jul-18,Walnut Finish,"Sometimes while playing a game, you can answer...",1
3,5,31-Jul-18,Charcoal Fabric,I have had a lot of fun with this thing. My 4 ...,1
4,5,31-Jul-18,Charcoal Fabric,Music,1


In [15]:
data=data_set[['verified_reviews','feedback']]
data.columns = ['review','label']

data.head()

Unnamed: 0,review,label
0,Love my Echo!,1
1,Loved it!,1
2,"Sometimes while playing a game, you can answer...",1
3,I have had a lot of fun with this thing. My 4 ...,1
4,Music,1


In [16]:
# count occs of each label
label_counts = data.value_counts('label')


# get the nb of rows to drop from the majority class
rows_to_drop = label_counts.max()- label_counts.min()

#drop rows from the majority class
if rows_to_drop>0:
    data_majority = data[data['label']==1]
    data_balanced = data.drop(data_majority.sample(rows_to_drop).index)
else:
    data_balanced = data.copy()

#check the new class balance
print(data_balanced['label'].value_counts())


label
1    257
0    257
Name: count, dtype: int64


### data processing

In [17]:
import re

def clean_text(text):
    # Check if the input is a string
    if not isinstance(text, str):
        return ""  # or return text if you want to leave non-string values unchanged
    
    # Remove special characters and punctuation
    text = re.sub(r'[^\w\s]', ' ', text)

    # Remove single chars
    text = re.sub(r'\b[a-zA-Z]\b', ' ', text)

    # Remove HTML tags
    text = re.sub(r'<[^>]*>', '', text)

    # Lowercase text
    text = text.lower()

    # Remove extra whitespaces
    text = re.sub(r'\s+', ' ', text)

    # Trim leading and trailing space
    text = text.strip()

    return text


In [18]:
data_balanced.head()


Unnamed: 0,review,label
9,Love it! I’ve listened to songs I haven’t hear...,1
16,Really happy with this purchase. Great speake...,1
21,"We love Alexa! We use her to play music, play ...",1
28,Fun item to play with and get used to using. ...,1
35,I bought this to compare the speaker quality t...,1


In [19]:
# extract review column as a list
reviews = data_balanced['review'].tolist()

#clean the text in the list
cleaned_reviews = [clean_text(review) for review in reviews]

# add cleaned reviews as a new column to the dataframe
data_balanced['clean_review'] = cleaned_reviews



### data split

In [20]:
total_rows = len(data_balanced)
test_size = int(total_rows*0.95)

# randomly sample train_size rows for the training set
test_set = data_balanced.sample(test_size)

#get the remaining rows for the test set
train_set = data_balanced.drop(test_set.index)

### sentinent w/ LLM


In [21]:
import pathlib
import textwrap

import google.generativeai as genai 


In [22]:
from IPython.display import display
from IPython.display import Markdown

def to_markdown(text):
    text = text.replace('•', '  *')
    return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))




In [23]:
from dotenv import load_dotenv
import os

load_dotenv()

GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY')

if GOOGLE_API_KEY is None:
    print("GOOGLE_API_KEY environment variable not set.")
else:
    genai.configure(api_key=GOOGLE_API_KEY)


In [24]:
for m in genai.list_models():
    if 'generateContent' in m.supported_generation_methods:
        print(m.name)

models/gemini-1.0-pro-latest
models/gemini-1.0-pro
models/gemini-pro
models/gemini-1.0-pro-001
models/gemini-1.0-pro-vision-latest
models/gemini-pro-vision
models/gemini-1.5-pro-latest
models/gemini-1.5-pro-001
models/gemini-1.5-pro-002
models/gemini-1.5-pro
models/gemini-1.5-pro-exp-0801
models/gemini-1.5-pro-exp-0827
models/gemini-1.5-flash-latest
models/gemini-1.5-flash-001
models/gemini-1.5-flash-001-tuning
models/gemini-1.5-flash
models/gemini-1.5-flash-exp-0827
models/gemini-1.5-flash-002
models/gemini-1.5-flash-8b
models/gemini-1.5-flash-8b-001
models/gemini-1.5-flash-8b-latest
models/gemini-1.5-flash-8b-exp-0827
models/gemini-1.5-flash-8b-exp-0924
models/learnlm-1.5-pro-experimental
models/gemini-exp-1114
models/gemini-exp-1121


In [25]:
model = genai.GenerativeModel('gemini-pro')

In [26]:
response= model.generate_content('What is the meaning of life?')
to_markdown(response.text)


> **Philosophical Perspectives:**
> 
> * **Existentialism:** There is no inherent meaning to life, but individuals can create their own through their choices and actions.
> * **Absurdism:** Life is meaningless and absurd, and humans must embrace the absurdity and create their own meaning.
> * **Nihilism:** Life has no intrinsic value or purpose, and everything is ultimately meaningless.
> * **Stoicism:** Life's meaning lies in living in accordance with nature and accepting what cannot be changed.
> * **Utilitarianism:** The meaning of life is to maximize happiness and minimize suffering for all.
> 
> **Religious Perspectives:**
> 
> * **Theism:** Life's purpose is to worship and serve a higher power.
> * **Atheism:** Life has no supernatural meaning and is limited to the present existence.
> * **Buddhism:** The purpose of life is to achieve enlightenment and end suffering.
> * **Hinduism:** Life is a cycle of birth, death, and rebirth, with the ultimate goal of moksha (liberation from the cycle).
> 
> **Scientific and Evolutionary Perspectives:**
> 
> * **Naturalism:** Life has no inherent purpose beyond biological processes and the continuation of the species.
> * **Evolution:** Life has evolved over time, and its purpose is to survive and reproduce.
> * **Sociobiology:** Human behavior is influenced by genetic predispositions that evolved to promote survival and reproduction.
> 
> **Personal Perspectives:**
> 
> * **Self-Actualization:** Life's purpose is to develop one's potential and live a fulfilling existence.
> * **Purpose-Driven:** Life has a specific goal or mission that provides meaning and direction.
> * **Experiential:** Life's purpose is to experience the world and enjoy its wonders.
> * **Contribution:** Life's meaning lies in contributing to society and making a positive impact on others.
> 
> **Additional Considerations:**
> 
> * **Subjective Nature:** The meaning of life is a subjective and personal experience.
> * **Contextual:** Life's meaning can change depending on individual, cultural, and historical factors.
> * **Ongoing Journey:** The search for meaning is an ongoing process that can evolve over time.
> * **Non-Binary:** Life's meaning can encompass multiple perspectives and does not have to be limited to one definition.

### single api call

In [27]:
test_set_sample = test_set.sample(20)

test_set_sample['pred_label']=''

test_set_sample

Unnamed: 0,review,label,clean_review,pred_label
2559,I love this product so far only because of the...,1,love this product so far only because of the f...,
1015,The sound quality is good just wish alexa coul...,1,the sound quality is good just wish alexa coul...,
2338,"Works fine, I wanted this for my bedroom telev...",1,works fine wanted this for my bedroom television,
939,Good sound. Like it for music and scheduling. ...,1,good sound like it for music and scheduling li...,
2442,"Easy set up, nice looking interface and I like...",1,easy set up nice looking interface and like th...,
2842,"I reached out to Amazon, because the device wa...",0,reached out to amazon because the device wante...,
1059,Honestly I like it but at the same time I don’...,0,honestly like it but at the same time don coul...,
2329,Amazing for all your entertainment needs!,1,amazing for all your entertainment needs,
2005,Why do we need to buy a $100 hub to get it to ...,0,why do we need to buy 100 hub to get it to wor...,
2328,I don't like how I cannot load my Hulu account...,0,don like how cannot load my hulu account it is...,


### convert df to json using to_json() method 

In [30]:
json_data = test_set_sample[['clean_review','pred_label']].to_json(orient='records')
print(json_data)

[{"clean_review":"love this product so far only because of the few things can do with it such as set timer reminder play music and games don like that to get the full enjoyment from it that you have to purchase so many things it was also very difficult to set up the directions made it sound as simple as one two three but they forgot about step four five and six was almost ready to send to back the app is extremely slow and it took hours to get this thing set up however depending on what you are going to be using it for think it an ok item","pred_label":""},{"clean_review":"the sound quality is good just wish alexa could answer more questions","pred_label":""},{"clean_review":"works fine wanted this for my bedroom television","pred_label":""},{"clean_review":"good sound like it for music and scheduling like that can get random thoughts recorded while think about them the jokes are cute","pred_label":""},{"clean_review":"easy set up nice looking interface and like the new slightly larger

In [32]:
prompt = f"""
You are an expert linguier, who is good at classifying customer review sentinence into Positive/Negative. 
Help me classify customer reviews into: Positive(label=1), and Negative(label=0).
Customer reviews are provided between three back ticks.
In your output, only return the Json code back as output - which is provided between three backticks. 
Your task is to update predicted labels under 'pred_label' in the Json code. 
Don't make any changes to Json code format, please. 


```
{json_data}
""" 
print(prompt)


You are an expert linguier, who is good at classifying customer review sentinence into Positive/Negative. 
Help me classify customer reviews into: Positive(label=1), and Negative(label=0).
Customer reviews are provided between three back ticks.
In your output, only return the Json code back as output - which is provided between three backticks. 
Your task is to update predicted labels under 'pred_label' in the Json code. 
Don't make any changes to Json code format, please. 


```
[{"clean_review":"love this product so far only because of the few things can do with it such as set timer reminder play music and games don like that to get the full enjoyment from it that you have to purchase so many things it was also very difficult to set up the directions made it sound as simple as one two three but they forgot about step four five and six was almost ready to send to back the app is extremely slow and it took hours to get this thing set up however depending on what you are going to be us

In [33]:
response = model.generate_content(prompt)

print(response.text)

```
[{"clean_review":"love this product so far only because of the few things can do with it such as set timer reminder play music and games don like that to get the full enjoyment from it that you have to purchase so many things it was also very difficult to set up the directions made it sound as simple as one two three but they forgot about step four five and six was almost ready to send to back the app is extremely slow and it took hours to get this thing set up however depending on what you are going to be using it for think it an ok item","pred_label":0},{"clean_review":"the sound quality is good just wish alexa could answer more questions","pred_label":1},{"clean_review":"works fine wanted this for my bedroom television","pred_label":1},{"clean_review":"good sound like it for music and scheduling like that can get random thoughts recorded while think about them the jokes are cute","pred_label":1},{"clean_review":"easy set up nice looking interface and like the new slightly larger

In [40]:
import json

#clean the data by stripping the backticks
json_data = response.text.strip("`")

#load the cleaned data and convert to Df
data_ = json.loads(json_data)
df_sample = pd.DataFrame(data_)

df_sample

Unnamed: 0,clean_review,pred_label
0,love this product so far only because of the f...,0
1,the sound quality is good just wish alexa coul...,1
2,works fine wanted this for my bedroom television,1
3,good sound like it for music and scheduling li...,1
4,easy set up nice looking interface and like th...,1
5,reached out to amazon because the device wante...,0
6,honestly like it but at the same time don coul...,0
7,amazing for all your entertainment needs,1
8,why do we need to buy 100 hub to get it to wor...,0
9,don like how cannot load my hulu account it is...,0


In [41]:
test_set_sample['pred_label'] = df_sample['pred_label'].values
test_set_sample

Unnamed: 0,review,label,clean_review,pred_label
2559,I love this product so far only because of the...,1,love this product so far only because of the f...,0
1015,The sound quality is good just wish alexa coul...,1,the sound quality is good just wish alexa coul...,1
2338,"Works fine, I wanted this for my bedroom telev...",1,works fine wanted this for my bedroom television,1
939,Good sound. Like it for music and scheduling. ...,1,good sound like it for music and scheduling li...,1
2442,"Easy set up, nice looking interface and I like...",1,easy set up nice looking interface and like th...,1
2842,"I reached out to Amazon, because the device wa...",0,reached out to amazon because the device wante...,0
1059,Honestly I like it but at the same time I don’...,0,honestly like it but at the same time don coul...,0
2329,Amazing for all your entertainment needs!,1,amazing for all your entertainment needs,1
2005,Why do we need to buy a $100 hub to get it to ...,0,why do we need to buy 100 hub to get it to wor...,0
2328,I don't like how I cannot load my Hulu account...,0,don like how cannot load my hulu account it is...,0
