In [2]:
import pandas as pd

# Load your dataset
df = pd.read_excel("NLP test data for assignment 0324.xlsb", usecols=["OriginalTweet", "Sentiment"])
df.head(10)

Unnamed: 0,OriginalTweet,Sentiment
0,TRENDING: New Yorkers encounter empty supermar...,Extremely Negative
1,When I couldn't find hand sanitizer at Fred Me...,Positive
2,Find out how you can protect yourself and love...,Extremely Positive
3,#Panic buying hits #NewYork City as anxious sh...,Negative
4,#toiletpaper #dunnypaper #coronavirus #coronav...,Neutral
5,Do you remember the last time you paid $2.99 a...,Neutral
6,Voting in the age of #coronavirus = hand sanit...,Positive
7,"@DrTedros ""We canÂ’t stop #COVID19 without pro...",Neutral
8,HI TWITTER! I am a pharmacist. I sell hand san...,Extremely Negative
9,Anyone been in a supermarket over the last few...,Extremely Positive


In [3]:
# Map labels to numerical values
sentiment_mapping = {
    'Extremely Negative': 0,
    'Negative': 1,
    'Neutral': 2,
    'Positive': 3,
    'Extremely Positive': 4
}
df['label'] = df['Sentiment'].map(sentiment_mapping)

In [4]:
import pandas as pd
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
import re

# Download NLTK resources
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

def preprocess_text(text):
    # Remove links
    text = re.sub(r'https?://\S+', '', text)

    # Tokenization
    tokens = word_tokenize(text)

    # Lowercasing
    tokens = [token.lower() for token in tokens]

    # Remove stopwords
    stop_words = set(stopwords.words('english'))
    tokens = [token for token in tokens if token not in stop_words]

    # Remove punctuation
    tokens = [re.sub(r'[^\w\s]', '', token) for token in tokens]

    # Lemmatization
    lemmatizer = WordNetLemmatizer()
    tokens = [lemmatizer.lemmatize(token) for token in tokens]

    # Join tokens back into a string
    preprocessed_text = ' '.join(tokens)

    return preprocessed_text

# Load dataset
#data = pd.read_csv("your_dataset.csv")  # Replace "your_dataset.csv" with the path to your dataset

# Apply preprocessing to a column in the dataset
df['Original_tweet'] = df['OriginalTweet'].apply(preprocess_text)

# Display the preprocessed text column
print(df['Original_tweet'])


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\rupesh\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\rupesh\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\rupesh\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


0       trending  new yorkers encounter empty supermar...
1       could nt find hand sanitizer fred meyer  turne...
2                   find protect loved one  coronavirus  
3        panic buying hit  newyork city anxious shoppe...
4        toiletpaper  dunnypaper  coronavirus  coronav...
                              ...                        
1464    supermarket  woman buy load toilet paper men p...
1465    light pandemic  moved online shopping  free sh...
1466    buy gift certificate others may selfquarantine...
1467    re going selfquarantine  highly recommend supp...
1468    grocery store  customer bought cart full groce...
Name: Original_tweet, Length: 1469, dtype: object


In [5]:
df.drop(['Sentiment'], axis=1, inplace=True)

In [6]:
df

Unnamed: 0,OriginalTweet,label,Original_tweet
0,TRENDING: New Yorkers encounter empty supermar...,0,trending new yorkers encounter empty supermar...
1,When I couldn't find hand sanitizer at Fred Me...,3,could nt find hand sanitizer fred meyer turne...
2,Find out how you can protect yourself and love...,4,find protect loved one coronavirus
3,#Panic buying hits #NewYork City as anxious sh...,1,panic buying hit newyork city anxious shoppe...
4,#toiletpaper #dunnypaper #coronavirus #coronav...,2,toiletpaper dunnypaper coronavirus coronav...
...,...,...,...
1464,Just been to the supermarket. Why do all women...,2,supermarket woman buy load toilet paper men p...
1465,"In light of the pandemic, i have moved to onli...",4,light pandemic moved online shopping free sh...
1466,Or buy gift certificates for others who may be...,4,buy gift certificate others may selfquarantine...
1467,"If you're going to self-quarantine, I highly r...",4,re going selfquarantine highly recommend supp...


In [7]:
df.value_counts('label')

label
1    425
3    352
4    241
2    238
0    213
Name: count, dtype: int64

In [8]:
!pip install -q -U google-generativeai

In [9]:
# Necessary packages
import pathlib
import textwrap

import google.generativeai as genai

from IPython.display import display
from IPython.display import Markdown


def to_markdown(text):
  text = text.replace('•', '  *')
  return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))



In [10]:

GOOGLE_API_KEY="AIzaSyDh793MNQDNs2h7yAZs0VsytZdMvRczaf8"

genai.configure(api_key=GOOGLE_API_KEY)

In [11]:
model = genai.GenerativeModel('gemini-pro')

In [12]:
%%time
response = model.generate_content("What is Sentiment analysis?")

to_markdown(response.text)

CPU times: total: 31.2 ms
Wall time: 6.98 s


> **Sentiment Analysis**
> 
> Sentiment analysis is a technique used in natural language processing (NLP) to identify and extract the sentiment expressed in text data. It involves determining whether a given piece of text conveys positive, negative, or neutral feelings or opinions.
> 
> **Key Features:**
> 
> * **Polarity Detection:** Identifies whether the sentiment is positive, negative, or neutral.
> * **Intensity Estimation:** Determines the strength or magnitude of the sentiment expressed.
> * **Contextual Understanding:** Considers the context and subjectivity of the text to accurately interpret sentiment.
> 
> **Process:**
> 
> Sentiment analysis algorithms typically follow these steps:
> 
> 1. **Preprocessing:** Cleaning and preparing the text by removing stop words, punctuation, and other noise.
> 2. **Feature Extraction:** Identifying specific words, phrases, or entities that convey sentiment.
> 3. **Sentiment Scoring:** Assigning weights to features based on their perceived sentiment.
> 4. **Aggregation:** Combining the scores to determine the overall sentiment of the text.
> 
> **Types of Sentiment Analysis:**
> 
> * **Rule-Based:** Uses predefined rules and dictionaries to identify sentiment-carrying words and phrases.
> * **Machine Learning:** Trains algorithms on labeled data to learn patterns and predict sentiment.
> * **Hybrid:** Combines rule-based and machine learning approaches for improved accuracy.
> 
> **Applications:**
> 
> Sentiment analysis is widely used in various industries, including:
> 
> * **Customer Reviews:** Analyzing customer feedback to understand product or service sentiment.
> * **Social Media Monitoring:** Tracking brand sentiment and identifying trends or crises.
> * **Political Analysis:** Monitoring sentiment towards candidates or policies.
> * **Financial Analysis:** Identifying market sentiment and predicting stock prices.
> * **Healthcare:** Analyzing patient feedback to improve treatment and medication outcomes.

In [13]:
test_set_sample = df.sample(20)

test_set_sample['pred_label'] = ''

test_set_sample

Unnamed: 0,OriginalTweet,label,Original_tweet,pred_label
105,We (@hamiltonpoverty) are calling on Canadian ...,4,hamiltonpoverty calling canadian federal go...,
478,Have been talking to lots of Trump supporters ...,0,talking lot trump supporter across country las...,
1379,Shopping carts at our local grocery store. Yes...,3,shopping cart local grocery store yes theyâ ...,
1228,People at the grocery store buying all the bot...,3,people grocery store buying bottled water like...,
968,None of these fuckers have been in a superstor...,3,none fucker superstore grocery store covid_19,
38,1/25 Part 1 - Why does soap work so well on th...,1,125 part 1 soap work well sarscov2 coronavir...,
1321,@BrentJSteele1 The toilet paper was entirely g...,1,brentjsteele1 toilet paper entirely gone groc...,
1409,I'm going out today to stock up on food not TP...,3,m going today stock food tp hear s gone store ...,
107,Local media in Naples reported that Seria A fo...,1,local medium naples reported seria football pl...,
307,The fact that I probably will not have my work...,0,fact probably work close cancel covid19 kinda ...,


In [14]:
# Convert the DataFrame to JSON using the to_json() method

json_data = test_set_sample[['Original_tweet','pred_label']].to_json(orient='records')

# Print the JSON data
print(json_data)

[{"Original_tweet":"  hamiltonpoverty  calling canadian federal government   provide  basicincome wellbeing stimulus payment  current covid19 situation  basic income  even interim  could help keep people healthy  revive consumer confidence spending   cdnpoli","pred_label":""},{"Original_tweet":"talking lot trump supporter across country last couple day  disdain distrust medium right worse ve seen point throughout presidency ","pred_label":""},{"Original_tweet":"shopping cart local grocery store  yes  they\u00e2  inside line looked long  atm cash well  wanted buy gift card give away stream tonight  worse  virus panic   covid_19  coronaviruspandemic","pred_label":""},{"Original_tweet":"people grocery store buying bottled water like tap water oozing  covid_19 faucet  lemming ","pred_label":""},{"Original_tweet":"none fucker superstore grocery store  covid_19","pred_label":""},{"Original_tweet":"125 part 1  soap work well sarscov2  coronavirus indeed virus  selfassembled nanoparticle weake

In [15]:
prompt = f"""
You are an expert in tweet discussing the impact of the coronavirus pandemic on daily life . 
Consider aspects such as empty supermarkets, scarcity of hand sanitizer, panic buying, price changes, preventive measures, and experiences in supermarkets during the pandemic. 
Your tweet should reflect on how these factors have affected you or your community. 
Aim to convey a balanced perspective while capturing the essence of the challenges faced and potential adaptations made during these uncertain times., 
who is good at classifying the tweets you help me to clsaaify the tweets Positive(label=3),Negative(label=1),Extremely Negative:(label=0),Extremely Positive:(label=4), Neutral: (label=2)

In your output, only return the Json code back as output - which is provided between three backticks.
Your task is to update predicted labels under 'pred_label' in the Json code.
Don't make any changes to Json code format, please.

```
{json_data}
```
"""

print(prompt)


You are an expert in tweet discussing the impact of the coronavirus pandemic on daily life . 
Consider aspects such as empty supermarkets, scarcity of hand sanitizer, panic buying, price changes, preventive measures, and experiences in supermarkets during the pandemic. 
Your tweet should reflect on how these factors have affected you or your community. 
Aim to convey a balanced perspective while capturing the essence of the challenges faced and potential adaptations made during these uncertain times., 
who is good at classifying the tweets you help me to clsaaify the tweets Positive(label=3),Negative(label=1),Extremely Negative:(label=0),Extremely Positive:(label=4), Neutral: (label=2)

In your output, only return the Json code back as output - which is provided between three backticks.
Your task is to update predicted labels under 'pred_label' in the Json code.
Don't make any changes to Json code format, please.

```
[{"Original_tweet":"  hamiltonpoverty  calling canadian federal gov

In [16]:
response = model.generate_content(prompt)

print(response.text)

```
[{"Original_tweet":"  hamiltonpoverty  calling canadian federal government   provide  basicincome wellbeing stimulus payment  current covid19 situation  basic income  even interim  could help keep people healthy  revive consumer confidence spending   cdnpoli","pred_label":3},{"Original_tweet":"talking lot trump supporter across country last couple day  disdain distrust medium right worse ve seen point throughout presidency ","pred_label":1},{"Original_tweet":"shopping cart local grocery store  yes  they\u00e2  inside line looked long  atm cash well  wanted buy gift card give away stream tonight  worse  virus panic   covid_19  coronaviruspandemic","pred_label":1},{"Original_tweet":"people grocery store buying bottled water like tap water oozing  covid_19 faucet  lemming ","pred_label":0},{"Original_tweet":"none fucker superstore grocery store  covid_19","pred_label":0},{"Original_tweet":"125 part 1  soap work well sarscov2  coronavirus indeed virus  selfassembled nanoparticle weakes

In [17]:
import json

# Clean the data by stripping the backticks
json_data = response.text.strip("`")

# Load the cleaned data and convert to DataFrame
data = json.loads(json_data)
df_sample = pd.DataFrame(data)

df_sample

Unnamed: 0,Original_tweet,pred_label
0,hamiltonpoverty calling canadian federal go...,3
1,talking lot trump supporter across country las...,1
2,shopping cart local grocery store yes theyâ ...,1
3,people grocery store buying bottled water like...,0
4,none fucker superstore grocery store covid_19,0
5,125 part 1 soap work well sarscov2 coronavir...,4
6,brentjsteele1 toilet paper entirely gone groc...,1
7,m going today stock food tp hear s gone store ...,1
8,local medium naples reported seria football pl...,1
9,fact probably work close cancel covid19 kinda ...,0


In [21]:
# Map labels to numerical values
sentiment_mapping = {
     0:'Extremely Negative',
     1:'Negative',
     2:'Neutral',
     3:'Positive',
    4:'Extremely Positive'
}
df_sample['label'] = df_sample['pred_label'].map(sentiment_mapping)

In [22]:
df_sample

Unnamed: 0,Original_tweet,pred_label,label
0,hamiltonpoverty calling canadian federal go...,3,Positive
1,talking lot trump supporter across country las...,1,Negative
2,shopping cart local grocery store yes theyâ ...,1,Negative
3,people grocery store buying bottled water like...,0,Extremely Negative
4,none fucker superstore grocery store covid_19,0,Extremely Negative
5,125 part 1 soap work well sarscov2 coronavir...,4,Extremely Positive
6,brentjsteele1 toilet paper entirely gone groc...,1,Negative
7,m going today stock food tp hear s gone store ...,1,Negative
8,local medium naples reported seria football pl...,1,Negative
9,fact probably work close cancel covid19 kinda ...,0,Extremely Negative
