# SENTIMENT ANALYSIS

#  Importing Libraries

In [27]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")

# Loading the Dataset

In [None]:
df = pd.read_csv('/content/IMDB Dataset.csv')
df

# Counting Sentiments

In [None]:
df["sentiment"].value_counts()

# Encoding Sentiment Labels

In [4]:
df.replace ({"sentiment": {"positive" : 1 , "negative" : 0}} , inplace = True)

In [None]:
df

In [None]:
df.head()

In [None]:
df.tail()

In [None]:
df["sentiment"].value_counts()

# Importing Key Libraries for Modeling

In [9]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, LSTM
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Splitting Data

In [10]:
train_data , test_data = train_test_split(df , test_size = 0.2 , random_state=42 )

In [None]:
train_data

In [None]:
test_data

# Tokenizing Text Data

In [13]:
tokenizer = Tokenizer(num_words= 5000)
tokenizer.fit_on_texts(train_data["review"])

# Padding Sequences

In [14]:
x_train = pad_sequences(tokenizer.texts_to_sequences(train_data["review"]),maxlen = 200)
x_test = pad_sequences(tokenizer.texts_to_sequences(test_data["review"]),maxlen = 200)

In [None]:
x_train

In [None]:
x_test

In [17]:
y_train = train_data["sentiment"]
y_test = test_data["sentiment"]

In [None]:
y_train

# Model Definition

In [19]:
model = Sequential()
model.add(Embedding(input_dim= 5000 , output_dim= 128 , input_length = 200))
model.add(LSTM(128 , dropout=0.2 , recurrent_dropout= 0.2))
model.add(Dense(1, activation="sigmoid"))
model.build(input_shape=(None, 200))

In [None]:
model.summary()

# Compiling and Training

In [None]:
model.compile(optimizer= "adam" , loss= "binary_crossentropy" , metrics= ["accuracy"])
model.fit(x_train , y_train , epochs= 5 ,batch_size= 64 , validation_split= 0.2)

# Prediction System

In [None]:
from keras.models import load_model
import joblib
from tensorflow.keras.preprocessing.sequence import pad_sequences

model = load_model("/content/model.h5")
tokenizer = joblib.load("/content/tokenizer.pkl")

from collections.abc import Sequence
def predictive_system(review):
  sequences = tokenizer.texts_to_sequences([review])
  padded_sequence = pad_sequences(sequences, maxlen= 200)
  prediction = model.predict(padded_sequence)
  sentiment = "positive" if prediction[0][0] > 0.5 else "negative"
  return sentiment

# Testing the Prediction System

In [None]:
review_sentiment = predictive_system("it is good")

In [None]:
review_sentiment

# Deploying with Gradio

In [None]:
!pip install gradio

In [None]:
import gradio as gr
title = "SENTIMENT ANALYSIS APPLICATION"

app = gr.Interface(fn = predictive_system , inputs="textbox" , outputs="textbox" , title=title)
app.launch(share= True)
