# Installing Dependencies

In [5]:
!pip install torch torchvision torchaudio

Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch)
  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)
Collecting nvidia-cufft-cu12==11.0.2.54 (from torch)
  Using cached nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl (121.6 MB)
Collecting nvidia-curand-cu12==10.3.2.106 (from torch)
  Using cached nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl (56.5 MB)
Collectin

In [2]:
!pip install transformers pandas numpy



In [3]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

In [6]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [8]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import plotly.express as px

# Loading the Model

In [4]:
tokenizer = AutoTokenizer.from_pretrained('nlptown/bert-base-multilingual-uncased-sentiment')

model = AutoModelForSequenceClassification.from_pretrained('nlptown/bert-base-multilingual-uncased-sentiment')

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/39.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/953 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/872k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/669M [00:00<?, ?B/s]

## Testing the Model

In [10]:
df = df.dropna().reset_index(drop=True)

In [14]:
tokens = tokenizer.encode('The dinner was amazing', return_tensors='pt')

In [15]:
result = model(tokens)

In [16]:
result.logits

tensor([[-1.8359, -1.8749, -0.3719,  1.2871,  2.2127]],
       grad_fn=<AddmmBackward0>)

In [17]:
int(torch.argmax(result.logits))+1

5

This means sentiment is very positive

# Dataset 1

## Dataset loading and preprocessing

In [9]:
df = pd.read_csv("/content/drive/MyDrive/DV/DV_Mini_Project/LokSabha_Election_2024_Tweets.csv")
df.head()

Unnamed: 0,link,text,date,No_of_likes,No_of_comments
0,https://twitter.com/Politics_2022_/status/1739...,South India 2024 Loksabha Opinion Poll 132 sea...,"Dec 25, 2023 · 12:05 PM UTC",324,27
1,https://twitter.com/LokmatTimes_ngp/status/173...,"Veteran actor Nana Patekar says, ""There is no ...","Dec 25, 2023 · 1:00 PM UTC",0,0
2,https://twitter.com/Politics_2022_/status/1739...,South India BJP 2019 Loksabha Karnataka 25 Tel...,"Dec 25, 2023 · 12:21 PM UTC",142,18
3,https://twitter.com/rohanayak/status/173926994...,Issh baar 357 seats in Loksabha election.,"Dec 25, 2023 · 1:00 PM UTC",0,0
4,https://twitter.com/BellamSwathi/status/173919...,Total number of times MMS won his own lok sabh...,"Dec 25, 2023 · 8:19 AM UTC",589,80


## Sentimental Analysis using BERT

In [11]:
def analyze_sentiment(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
    outputs = model(**inputs)
    scores = outputs[0][0].detach().numpy()
    scores = torch.nn.functional.softmax(torch.tensor(scores), dim=0)
    sentiment = scores.argmax().item()
    return sentiment

In [12]:
df['sentiment'] = df['text'].apply(analyze_sentiment)
df.head()

Unnamed: 0,link,text,date,No_of_likes,No_of_comments,sentiment
0,https://twitter.com/Politics_2022_/status/1739...,South India 2024 Loksabha Opinion Poll 132 sea...,"Dec 25, 2023 · 12:05 PM UTC",324,27,1
1,https://twitter.com/LokmatTimes_ngp/status/173...,"Veteran actor Nana Patekar says, ""There is no ...","Dec 25, 2023 · 1:00 PM UTC",0,0,3
2,https://twitter.com/Politics_2022_/status/1739...,South India BJP 2019 Loksabha Karnataka 25 Tel...,"Dec 25, 2023 · 12:21 PM UTC",142,18,2
3,https://twitter.com/rohanayak/status/173926994...,Issh baar 357 seats in Loksabha election.,"Dec 25, 2023 · 1:00 PM UTC",0,0,0
4,https://twitter.com/BellamSwathi/status/173919...,Total number of times MMS won his own lok sabh...,"Dec 25, 2023 · 8:19 AM UTC",589,80,0


## Plotting using Plotly

In [13]:
sentiment_counts = df['sentiment'].value_counts().sort_index()
sentiment_labels = ['Very Negative', 'Negative', 'Neutral', 'Positive', 'Very Positive']

fig = px.bar(x=sentiment_labels, y=sentiment_counts, labels={'x': 'Sentiment', 'y': 'Count'}, title='Sentiment Analysis of Tweets')
fig.show()

# Dataset 2

In [18]:
df_Modi = pd.read_csv("/content/drive/MyDrive/DV/DV_Mini_Project/Narendra Modi_data.csv")
df_Modi.head(2)

Unnamed: 0,Date,User,Tweet,Time
0,2022:10:19,QuestionsBotYT,Is Narendra Modi a toaster?,23:57:08
1,2022:10:19,PaperDabba,"5G About To Bring Major Change, Will Revolutio...",23:56:38


In [19]:
df_Rahul = pd.read_csv("/content/drive/MyDrive/DV/DV_Mini_Project/Rahul Gandhi_data.csv")
df_Rahul.head(2)

Unnamed: 0,Date,User,Tweet,Time
0,2022:10:19,MdIjran,@JaikyYadav16 इन विकल्पों में से और अभी के समय...,23:55:49
1,2022:10:19,28bde43dae3c430,@ndtv Rahul Gandhi left congress in the mid ro...,23:53:30


In [20]:
df_ak = pd.read_csv("/content/drive/MyDrive/DV/DV_Mini_Project/Arvind Kejriwal_data.csv")
df_ak.head(2)

Unnamed: 0,Date,User,Tweet,Time
0,2022:10:19,bhoo_sene,@TajinderBagga Aap leaders are speaking agains...,23:47:01
1,2022:10:19,Madhusu88858324,Bjp Aap se sikh rhi h\nNarendra Modi Manish Si...,23:06:35


In [21]:
df_Modi_sample = df_Modi.sample(n=500, random_state=42)
df_Rahul_sample = df_Rahul.sample(n=500, random_state=42)
df_ak_sample = df_ak.sample(n=500, random_state=42)

In [23]:
df_Modi_sample['sentiment'] = df_Modi_sample['Tweet'].apply(analyze_sentiment)
df_Rahul_sample['sentiment'] = df_Rahul_sample['Tweet'].apply(analyze_sentiment)
df_ak_sample['sentiment'] = df_ak_sample['Tweet'].apply(analyze_sentiment)

In [24]:
df_Modi_sample['person'] = 'Modi'
df_Rahul_sample['person'] = 'Rahul'
df_ak_sample['person'] = 'Kejriwal'

In [25]:
df_combined = pd.concat([df_Modi_sample, df_Rahul_sample, df_ak_sample])

In [26]:
fig = px.histogram(df_combined, x='sentiment', color='person', barmode='group',
                   category_orders={"sentiment": [0, 1, 2, 3, 4]},
                   labels={'sentiment': 'Sentiment', 'count': 'Count'},
                   title='Comparative Sentiment Analysis of Tweets for Modi, Rahul, and Kejriwal',
                   histnorm='percent')

fig.update_xaxes(tickvals=[0, 1, 2, 3, 4], ticktext=['Very Negative', 'Negative', 'Neutral', 'Positive', 'Very Positive'])

fig.show()