In [1]:
'''
Description: Build a sentiment analysis model to classify social media posts as positive, negative, or neutral.
Steps:
1.Data Collection: Gather a dataset of social media posts with labeled sentiments.
2. Text Preprocessing: Clean and preprocess the text data by removing special characters, stopwords, and performing
tokenization.
3. Feature Extraction: Convert the text data into numerical features using techniques like TF-IDF or word embeddings.
4. Model Selection: Choose a suitable classification algorithm such as Naive Bayes, Support Vector Machines, or a neural
network.
5. Model Training: Train the selected model using the preprocessed data.
6. Model Evaluation: Evaluate the model's performance using metrics like accuracy, precision, recall, and F1-score.
7. Deployment: Create a simple web interface where users can input their own text for sentiment analysis.
Tech Stack:
. Python
. Natural Language Processing libraries
. Machine Learning frameworks
'''
#1)
import pandas as pd
data=pd.read_csv('IMDB Dataset.csv')
#2)
import nltk
import re
nltk.download('stopwords')
nltk.download('punkt')
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
#Remove Special Characters and numbers
data['review']=data['review'].apply(lambda x:re.sub(r'\d+','',x))
data['review']=data['review'].apply(lambda x:re.sub(r'[^\w\s]','',x))
#Perform Tokenization
data['Tokens']=data['review'].apply(word_tokenize)
#Remove Stopwords
stop_words=set(stopwords.words('english'))
data['Tokens']=data['Tokens'].apply(lambda x: [word for word in x if word not in stop_words])
data['Preprocessed_data']=data['Tokens'].apply(lambda x:' '.join(x))
print(data['Preprocessed_data'])


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Muthu\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Muthu\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


0        One reviewers mentioned watching Oz episode yo...
1        A wonderful little production br br The filmin...
2        I thought wonderful way spend time hot summer ...
3        Basically theres family little boy Jake thinks...
4        Petter Matteis Love Time Money visually stunni...
                               ...                        
49995    I thought movie right good job It wasnt creati...
49996    Bad plot bad dialogue bad acting idiotic direc...
49997    I Catholic taught parochial elementary schools...
49998    Im going disagree previous comment side Maltin...
49999    No one expects Star Trek movies high art fans ...
Name: Preprocessed_data, Length: 50000, dtype: object


In [2]:
#3)
from sklearn.feature_extraction.text import TfidfVectorizer
tfidf_vectorizer=TfidfVectorizer(max_features=1000)
tfidf_features=tfidf_vectorizer.fit_transform(data['Preprocessed_data'])
data['tfidf_features']=list(tfidf_features.toarray())
print(data['tfidf_features'])

0        [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...
1        [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.103...
2        [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...
3        [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...
4        [0.0, 0.0, 0.0, 0.0, 0.0, 0.06511788398271987,...
                               ...                        
49995    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...
49996    [0.0, 0.0, 0.0, 0.0, 0.0, 0.1188902570665852, ...
49997    [0.0, 0.0, 0.0, 0.1419299357157053, 0.0, 0.087...
49998    [0.0, 0.0, 0.1665917791363702, 0.0, 0.0, 0.0, ...
49999    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...
Name: tfidf_features, Length: 50000, dtype: object


In [3]:
#4)
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
X=np.array(data['tfidf_features'].tolist())
Y=data['sentiment']
X_train,X_test,y_train,y_test=train_test_split(X,Y,test_size=0.3,random_state=42)
from sklearn.naive_bayes import MultinomialNB

#5)
nb_classifier=MultinomialNB()
nb_classifier.fit(X_train,y_train)
y_pred=nb_classifier.predict(X_test)
print("Classification Report:")
print(classification_report(y_test,y_pred))

Classification Report:
              precision    recall  f1-score   support

    negative       0.84      0.83      0.83      7411
    positive       0.83      0.84      0.84      7589

    accuracy                           0.83     15000
   macro avg       0.83      0.83      0.83     15000
weighted avg       0.83      0.83      0.83     15000



In [4]:
#6)
from sklearn.metrics import precision_score,recall_score,f1_score,accuracy_score
print("Accuracy:",accuracy_score(y_test,y_pred))
print("Precison:",precision_score(y_test,y_pred,average='weighted'))
print("F1-Score:",f1_score(y_test,y_pred,average='weighted'))
print("Recall:",recall_score(y_test,y_pred,average='weighted'))

Accuracy: 0.8339333333333333
Precison: 0.8339524319516478
F1-Score: 0.8339173395924018
Recall: 0.8339333333333333


In [5]:
#7)
#Save the Model and vectorizer using Pickle
import pickle
with open('model.pkl','wb') as f:
    pickle.dump(nb_classifier,f)
with open('vectorizer.pkl','wb') as f:
    pickle.dump(tfidf_vectorizer,f)

In [6]:
!pip install Flask flask-ngrok



In [10]:
#Loading the Model
with open('model.pkl','rb') as f:
    model=pickle.load(f)
with open('vectorizer.pkl','rb') as f:
    vectorizer=pickle.load(f)
#Creating a UI
from flask import Flask,request,render_template_string
from flask_ngrok import run_with_ngrok
app=Flask(__name__)
run_with_ngrok(app)
#HTML Template as String
template='''
    <html>
        <head>
            <title>ML Prediction</title>
            <style>
    
            </style>
        </head>
        <body>
            <h1>Machine Learning Model Prediction</h1>
            <form action='/predict' method='post'>
                <label for="input_data">Enter the Movie Review</label>
             <textarea name="input_text"></textarea>
                <input type="submit"/>
            </form>
            {% if prediction %}
            <p id='ans_predict'>Predicted Result: {{ prediction }}</p>
            {% endif %}
        </body>
    </html> '''
@app.route('/')
def home():
    return render_template_string(template,prediction=None)
@app.route('/predict',methods=['POST'])
def predict():
    if(request.method=='POST'):
        data=request.form['input_text']
        num_features=vectorizer.transform([data])
        prediction=model.predict(num_features)[0]
        return render_template_string(template,prediction=prediction)
#Run the App
from threading import Thread
def run_app():
    app.run()
flask_thread=Thread(target=run_app)
flask_thread.start()

 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
Press CTRL+C to quit
127.0.0.1 - - [05/Jun/2024 15:03:20] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [05/Jun/2024 15:03:27] "POST /predict HTTP/1.1" 200 -
127.0.0.1 - - [05/Jun/2024 15:03:30] "POST /predict HTTP/1.1" 200 -
127.0.0.1 - - [05/Jun/2024 15:03:38] "POST /predict HTTP/1.1" 200 -
127.0.0.1 - - [05/Jun/2024 15:03:44] "POST /predict HTTP/1.1" 200 -
127.0.0.1 - - [05/Jun/2024 15:03:47] "POST /predict HTTP/1.1" 200 -
127.0.0.1 - - [05/Jun/2024 15:03:56] "POST /predict HTTP/1.1" 200 -
127.0.0.1 - - [05/Jun/2024 15:04:01] "POST /predict HTTP/1.1" 200 -
127.0.0.1 - - [05/Jun/2024 15:04:06] "POST /predict HTTP/1.1" 200 -
127.0.0.1 - - [05/Jun/2024 15:04:08] "POST /predict HTTP/1.1" 200 -
127.0.0.1 - - [05/Jun/2024 15:04:17] "POST /predict HTTP/1.1" 200 -
