In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import os

# Import functions for data preprocessing & data preparation
from sklearn.preprocessing import LabelEncoder
from sklearn.utils import resample
from sklearn.feature_extraction.text import CountVectorizer
from nltk.sentiment.vader import SentimentIntensityAnalyzer

from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from nltk.stem import PorterStemmer, LancasterStemmer
from nltk.stem.snowball import SnowballStemmer
from nltk.corpus import stopwords
from nltk.corpus import wordnet
import string
from string import punctuation
import nltk
import re

In [None]:
cd /content/drive/MyDrive/project sentiment analysis

/content/drive/MyDrive/project sentiment analysis


In [None]:
data=pd.read_csv('comments.csv')
data.head()

Unnamed: 0.1,Unnamed: 0,Comment,Likes,Time,user,UserLink
0,0,Love how Dr. Fate's design looks and how cool ...,12K,9 days ago,,
1,1,I can’t get over how good everything looks. Dr...,2.6K,7 days ago,,
2,2,Really hoping that this can save DC's movie un...,814,7 days ago,,
3,3,U cant deny how good this looks.Now if they ca...,91,6 days ago,,
4,4,"From this trailer, I have a feeling that this ...",367,7 days ago (edited),,


In [None]:
data1=data.drop(['Unnamed: 0','Likes','Time','user','UserLink'],axis=1)
data1.head()

Unnamed: 0,Comment
0,Love how Dr. Fate's design looks and how cool ...
1,I can’t get over how good everything looks. Dr...
2,Really hoping that this can save DC's movie un...
3,U cant deny how good this looks.Now if they ca...
4,"From this trailer, I have a feeling that this ..."


In [None]:
nltk.download('vader_lexicon')
sentiments = SentimentIntensityAnalyzer()
data1["Positive"] = [sentiments.polarity_scores(i)["pos"] for i in data1["Comment"]]
data1["Negative"] = [sentiments.polarity_scores(i)["neg"] for i in data1["Comment"]]
data1["Neutral"] = [sentiments.polarity_scores(i)["neu"] for i in data1["Comment"]]
data1['Compound'] = [sentiments.polarity_scores(i)["compound"] for i in data1["Comment"]]
score = data1["Compound"].values
sentiment = []
for i in score:
    if i >= 0.05 :
        sentiment.append('Positive')
    elif i <= -0.05 :
        sentiment.append('Negative')
    else:
        sentiment.append('Neutral')
data1["Sentiment"] = sentiment
data1.head()

[nltk_data] Downloading package vader_lexicon to /root/nltk_data...


Unnamed: 0,Comment,Positive,Negative,Neutral,Compound,Sentiment
0,Love how Dr. Fate's design looks and how cool ...,0.384,0.0,0.616,0.891,Positive
1,I can’t get over how good everything looks. Dr...,0.153,0.0,0.847,0.6801,Positive
2,Really hoping that this can save DC's movie un...,0.375,0.0,0.625,0.9216,Positive
3,U cant deny how good this looks.Now if they ca...,0.302,0.049,0.649,0.9262,Positive
4,"From this trailer, I have a feeling that this ...",0.131,0.0,0.869,0.4416,Positive


In [None]:
data2=data1.drop(['Positive','Negative','Neutral','Compound'],axis=1)
data2.head()

Unnamed: 0,Comment,Sentiment
0,Love how Dr. Fate's design looks and how cool ...,Positive
1,I can’t get over how good everything looks. Dr...,Positive
2,Really hoping that this can save DC's movie un...,Positive
3,U cant deny how good this looks.Now if they ca...,Positive
4,"From this trailer, I have a feeling that this ...",Positive


In [None]:
import nltk
nltk.download('stopwords')
stop_words = stopwords.words('english')
porter_stemmer = PorterStemmer()
lancaster_stemmer = LancasterStemmer()
snowball_stemer = SnowballStemmer(language="english")
lzr = WordNetLemmatizer()

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


In [None]:
def text_processing(text):
    # convert text into lowercase
    text = text.lower()

    # remove new line characters in text
    text = re.sub(r'\n',' ', text)

    # remove punctuations from text
    text = re.sub('[%s]' % re.escape(punctuation), "", text)

    # remove references and hashtags from text
    text = re.sub("^a-zA-Z0-9$,.", "", text)

    # remove multiple spaces from text
    text = re.sub(r'\s+', ' ', text, flags=re.I)

    # remove special characters from text
    text = re.sub(r'\W', ' ', text)

    text = ' '.join([word for word in word_tokenize(text) if word not in stop_words])

    text=' '.join([lzr.lemmatize(word) for word in word_tokenize(text)])

    return text

In [None]:
nltk.download('punkt')
nltk.download('wordnet')
data_copy = data2.copy()
data_copy.Comment = data_copy.Comment.apply(lambda text: text_processing(text))
le = LabelEncoder()
data_copy['Sentiment'] = le.fit_transform(data_copy['Sentiment'])
processed_data = {
    'Sentence':data_copy.Comment,
    'Sentiment':data_copy['Sentiment']
}

processed_data = pd.DataFrame(processed_data)
processed_data.head()

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...


Unnamed: 0,Sentence,Sentiment
0,love dr fate design look cool scene look power...,2
1,get good everything look dr fate magic cyclone...,2
2,really hoping save dc movie universe looking n...,2
3,u cant deny good looksnow follow rest movie go...,2
4,trailer feeling movie going one movie would ne...,2


In [None]:
processed_data['Sentiment'].value_counts()

2    205
1     39
0     36
Name: Sentiment, dtype: int64

In [None]:
df_neutral = processed_data[(processed_data['Sentiment']==1)]
df_negative = processed_data[(processed_data['Sentiment']==0)]
df_positive = processed_data[(processed_data['Sentiment']==2)]

# upsample minority classes
df_negative_upsampled = resample(df_negative,
                                 replace=True,
                                 n_samples= 205,
                                 random_state=42)

df_neutral_upsampled = resample(df_neutral,
                                 replace=True,
                                 n_samples= 205,
                                 random_state=42)


# Concatenate the upsampled dataframes with the neutral dataframe
final_data = pd.concat([df_negative_upsampled,df_neutral_upsampled,df_positive])

In [None]:
final_data['Sentiment'].value_counts()

0    205
1    205
2    205
Name: Sentiment, dtype: int64

In [None]:
corpus = []
for sentence in final_data['Sentence']:
    corpus.append(sentence)
corpus[0:5]


['trailer look sick im definitely watching movie',
 'actually look like villain trailer',
 'movie going push dc top comic book movie disaster early dceu new msheu mess two awesome projekts behind suicide squad peacemaker yeah going rock pun kinda intended',
 'damn sure im gon na watchdc seems going right track',
 'okay look absolutely incredible dc making look foolish ever even skeptical film definitely seeing opening weekend']

In [None]:
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer(max_features=1500)
X = cv.fit_transform(corpus).toarray()
y = final_data.iloc[:, -1].values

In [None]:
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)
classifier = GaussianNB()
classifier.fit(X_train, y_train)

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score
y_pred = classifier.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
nb_score = accuracy_score(y_test, y_pred)*100
print('accuracy',nb_score)

accuracy 93.51351351351352


In [None]:
from typing_extensions import Doc


ImportError: cannot import name 'Doc' from 'typing_extensions' (/usr/local/lib/python3.10/dist-packages/typing_extensions.py)

In [None]:
from typing import List, Any

def example_function(param1: str, param2: int) -> List[str]:
    # Function logic here
    return result_list


In [None]:
!pip install gradio numpy scikit-learn nltk


In [None]:
import gradio as gr

# Your trained Gaussian Naive Bayes classifier and preprocessing functions should be defined here

def predict_sentiment(text):
    # Preprocess the input text
    processed_text = text_processing(text)
    # Vectorize the processed text using CountVectorizer
    vectorized_text = cv.transform([processed_text]).toarray()
    # Predict the sentiment using the trained classifier
    prediction = classifier.predict(vectorized_text)[0]
    # Decode the predicted sentiment label
    decoded_prediction = le.inverse_transform([prediction])[0]
    return decoded_prediction

# Gradio UI
input_text = gr.Textbox(placeholder="Enter your text here...", label="Input Text")
output_label = gr.Label(label="Predicted Sentiment")

gr.Interface(fn=predict_sentiment, inputs=input_text, outputs=output_label, live=True).launch()
