# ✅Importing Libraries:

In [1]:
import streamlit as st
from tensorflow.keras.models import load_model, Sequential
from tensorflow.keras.layers import Dense
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
import nltk
from datetime import datetime
from nltk.sentiment import SentimentIntensityAnalyzer
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer, WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer
import string
import joblib
# Download NLTK resources
nltk.download('vader_lexicon')
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\منه\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\منه\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\منه\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\منه\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

# ✅Reading Dataset,Modifications:

In [2]:
data = pd.read_csv("sentimentdataset.csv")

data.rename(columns={'Sentiment (Label)': 'Target'}, inplace=True)
#removing any spaces and making all of them lower case
data['Target'] = data['Target'].str.strip()
data['Target'] = data['Target'].str.lower()
data['Source'] = data['Source'].str.strip()
data['Source'] = data['Source'].str.lower()
data['Country'] = data['Country'].str.strip()
data['Country'] = data['Country'].str.lower()


In [3]:
data.head()

Unnamed: 0,ID,Text,Target,Timestamp,User,Source,Topic,Retweets,Likes,Country,Year,Month,Day,Hour
0,0,Enjoying a beautiful day at the park! ...,positive,1/15/2023 12:30,User123,twitter,#Nature #Park,15,30,usa,2023,1,15,12
1,1,Traffic was terrible this morning. ...,negative,1/15/2023 8:45,CommuterX,twitter,#Traffic #Morning,5,10,canada,2023,1,15,8
2,2,Just finished an amazing workout! 💪 ...,positive,1/15/2023 15:45,FitnessFan,instagram,#Fitness #Workout,20,40,usa,2023,1,15,15
3,3,Excited about the upcoming weekend getaway! ...,positive,1/15/2023 18:20,AdventureX,facebook,#Travel #Adventure,8,15,uk,2023,1,15,18
4,4,Trying out a new recipe for dinner tonight. ...,neutral,1/15/2023 19:55,ChefCook,instagram,#Cooking #Food,12,25,australia,2023,1,15,19


# ✅Changing Target column to only have positive,negative,neutral:

In [4]:
Positive_sentiments = [
    'positive', 'happiness', 'joy', 'love', 'amusement', 'enjoyment', 'admiration', 'affection', 'awe',
    'acceptance', 'adoration', 'anticipation', 'calmness', 'excitement', 'kind', 'pride', 'elation',
    'euphoria', 'contentment', 'serenity', 'gratitude', 'hope', 'empowerment', 'compassion', 'tenderness',
    'arousal', 'enthusiasm', 'fulfillment', 'reverence', 'curiosity', 'determination', 'zest', 'hopeful',
    'proud', 'grateful', 'empathetic', 'compassionate', 'playful', 'free-spirited', 'inspired', 'confident',
    'thrill', 'overjoyed', 'inspiration', 'motivation', 'satisfaction', 'blessed', 'appreciation', 'confidence',
    'accomplishment', 'wonderment', 'optimism', 'enchantment', 'intrigue', 'playfuljoy', 'mindfulness', 'dreamchaser',
    'elegance', 'whimsy', 'harmony', 'creativity', 'radiance', 'wonder', 'rejuvenation', 'coziness', 'adventure',
    'melodic', 'festivejoy', 'innerjourney', 'freedom', 'dazzle', 'artisticburst', 'culinaryodyssey', 'resilience',
    'immersion', 'spark', 'marvel', 'positivity', 'kindness', 'friendship', 'success', 'exploration', 'amazement',
    'romance', 'captivation', 'tranquility', 'grandeur', 'emotion', 'energy', 'celebration', 'charm', 'ecstasy',
    'colorful', 'hypnotic', 'connection', 'iconic', 'journey', 'engagement', 'touched', 'triumph', 'heartwarming',
    'solace', 'breakthrough', 'joy in baking', 'envisioning history', 'imagination', 'vibrancy', 'mesmerizing',
    'culinary adventure', 'winter magic', 'thrilling journey', "nature's beauty", 'celestial wonder', 'creative inspiration',
    'runway creativity', "ocean's freedom", 'whispers of the past', 'relief','happy','joyfulreunion','adrenaline'
]

Negative_sentiments = [
    'negative', 'anger', 'fear', 'sadness', 'disgust', 'disappointed', 'bitter', 'confusion', 'shame',
    'despair', 'grief', 'loneliness', 'jealousy', 'resentment', 'frustration', 'boredom', 'anxiety', 'intimidation',
    'helplessness', 'envy', 'regret', 'numbness', 'melancholy', 'ambivalence', 'bitterness', 'yearning', 'fearful',
    'apprehensive', 'overwhelmed', 'jealous', 'devastated', 'frustrated', 'envious', 'dismissive', 'heartbreak',
    'betrayal', 'suffering', 'emotionalstorm', 'isolation', 'disappointment', 'lostlove', 'exhaustion', 'sorrow',
    'darkness', 'desperation', 'ruins', 'desolation', 'loss', 'heartache', 'solitude', 'obstacle', 'sympathy',
    'pressure', 'renewed effort', 'miscalculation', 'challenge', 'sad', 'hate', 'bad','bittersweet', 'embarrassed'
]

Neutral_sentiments = [
    'neutral', 'surprise', 'indifference', 'pensive', 'reflection', 'contemplation','mischievous','suspense','nostalgia'
]

In [5]:
for word in data['Target']:
  if word in Positive_sentiments:
    data['Target'].replace(word,'Positive',inplace=True)
  elif word in Negative_sentiments:
    data['Target'].replace(word,'Negative',inplace=True)
  elif word in Neutral_sentiments:
    data['Target'].replace(word,'Neutral',inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data['Target'].replace(word,'Positive',inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data['Target'].replace(word,'Negative',inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are se

In [6]:
data.drop(columns = ['Timestamp', 'Hour', 'ID', 'User','Source', 'Retweets','Likes','Country','Year', 'Month', 'Day'], inplace = True)

data[['Topic 1', 'Topic 2']] = data['Topic'].str.split(expand=True)
data = data.drop('Topic', axis=1)

#making both columns lower_case
data['Topic 1'] = data['Topic 1'].str.lower()
data['Topic 2'] = data['Topic 2'].str.lower()

data['Topic 1'] = data['Topic 1'].str.lstrip('#')
data['Topic 2'] = data['Topic 2'].str.lstrip('#')

In [7]:
data.head()

Unnamed: 0,Text,Target,Topic 1,Topic 2
0,Enjoying a beautiful day at the park! ...,Positive,nature,park
1,Traffic was terrible this morning. ...,Negative,traffic,morning
2,Just finished an amazing workout! 💪 ...,Positive,fitness,workout
3,Excited about the upcoming weekend getaway! ...,Positive,travel,adventure
4,Trying out a new recipe for dinner tonight. ...,Neutral,cooking,food


# ✅Encoding The Target column to Unique Values:


In [8]:
encoder = LabelEncoder()
cols = ["Target","Topic 1","Topic 2"]
for i in cols:
  data[i] = encoder.fit_transform(data[i])
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 732 entries, 0 to 731
Data columns (total 4 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   Text     732 non-null    object
 1   Target   732 non-null    int32 
 2   Topic 1  732 non-null    int32 
 3   Topic 2  732 non-null    int32 
dtypes: int32(3), object(1)
memory usage: 14.4+ KB


# ✅Preprocessing:

1. Handling Text Data

In [9]:
data.head()

Unnamed: 0,Text,Target,Topic 1,Topic 2
0,Enjoying a beautiful day at the park! ...,2,242,401
1,Traffic was terrible this morning. ...,0,354,361
2,Just finished an amazing workout! 💪 ...,2,151,628
3,Excited about the upcoming weekend getaway! ...,2,356,7
4,Trying out a new recipe for dinner tonight. ...,1,87,168


In [10]:
# Tokenization, removing punctuation, lowercasing, removing stopwords, and stemming or lemmatization
stop_words = set(stopwords.words('english'))
stemmer = PorterStemmer()

def preprocess_text(text):
    # Tokenization
    tokens = word_tokenize(text)

    # Removing punctuation and lowercasing
    tokens = [word.lower() for word in tokens if word.isalnum()]

    # Removing stopwords
    tokens = [word for word in tokens if word not in stop_words]

    return tokens

In [11]:
# Apply preprocessing function to the 'Text' column
data['Text'] = data['Text'].apply(preprocess_text)

In [12]:
lemmatizer = WordNetLemmatizer()
from nltk.corpus import wordnet

def get_wordnet_pos(tag):
    tag = tag[0].upper()
    tag_dict = {"J": wordnet.ADJ,
                "N": wordnet.NOUN,
                "V": wordnet.VERB,
                "R": wordnet.ADV}
    return tag_dict.get(tag, wordnet.NOUN)

def lemmatize_column(x):

    lemmatizer = WordNetLemmatizer()
    lemmatized_entry = [lemmatizer.lemmatize(token, pos=get_wordnet_pos(tag)) for token, tag in nltk.pos_tag(x)]
    return lemmatized_entry

data["Text"] = data["Text"].apply(lemmatize_column)

In [13]:
def join_tokens(tokenized_column):
    return tokenized_column.apply(lambda x: ' '.join(x))

data['Text'] = join_tokens(data['Text'])

In [14]:
# Print the DataFrame to check the result
data.head()

Unnamed: 0,Text,Target,Topic 1,Topic 2
0,enjoy beautiful day park,2,242,401
1,traffic terrible morning,0,354,361
2,finish amaze workout,2,151,628
3,excited upcoming weekend getaway,2,356,7
4,try new recipe dinner tonight,1,87,168


# ✅RNN

In [15]:
# from sklearn.feature_extraction.text import TfidfVectorizer
# # train_test_split function is used to split data into training and testing sets
# from sklearn.model_selection import train_test_split
# # Sequential is a model that represents a linear stack of layers to form RNN model
# # from keras.models import Sequential
# # This line imports Dense layer type from Keras. this layer is building block used to construct neural network architecture.
# # from keras.layers import Dense
# # to_categorical function is used to convert class vector (integers) to binary class matrix for categorical classification.
# from tensorflow.keras.utils import to_categorical

# # Split the dataset into training and testing sets
# X_train, X_test, y_train, y_test = train_test_split(data['Text'], data['Target'], test_size=0.25, random_state=42)

# # Initialize the TF-IDF vectorizer
# vectorizer = TfidfVectorizer()

# # Fit the vectorizer to the text data and transform it into TF-IDF vectors
# X_train_tfidf = vectorizer.fit_transform(X_train)
# X_test_tfidf = vectorizer.transform(X_test)

# # Convert TF-IDF vectors to arrays
# X_train_array = X_train_tfidf.toarray()
# X_test_array = X_test_tfidf.toarray()

# # Convert target labels to categorical

# # This line calculates the number of unique classes (targets) in the dataset.
# num_classes = len(data['Target'].unique())
# # these lines convert the target labels to categorical format
# y_train_category = to_categorical(y_train, num_classes=num_classes)
# y_test_category = to_categorical(y_test, num_classes=num_classes)

# # Define the RNN model architecture
# model = Sequential()
# # This line adds a fully connected Dense layer with 128 units and ReLU activation function to the model. It also specifies the input shape based on the number of features in the training data.
# model.add(Dense(128, input_dim=X_train_array.shape[1], activation='relu'))
# # This line adds a fully connected Dense layer with softmax activation function to the model. The number of units in this layer is equal to the number of classes, and softmax activation is used for multi-class classification.
# model.add(Dense(num_classes, activation='softmax'))

# # This line compiles the model, specifying the loss function, optimizer, and evaluation metric.
# model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# # Train the model
# epochs = 15
# batch_size = 64
# # This line trains the model on the training data (X_train_array, y_train_category) for a specified number of epochs and batch size. It also uses the validation data (X_test_array, y_test_category) for validation during training
# history = model.fit(X_train_array, y_train_category, epochs=epochs, batch_size=batch_size, validation_data=(X_test_array, y_test_category), verbose=2)

# # Evaluate the model
# _, accuracy = model.evaluate(X_test_array, y_test_category)
# print('Accuracy:', accuracy)


In [16]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from imblearn.over_sampling import SMOTE
from sklearn.metrics import classification_report

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(data['Text'], data['Target'], test_size=0.25, random_state=42)

# Initialize the TF-IDF vectorizer
vectorizer = TfidfVectorizer()

# Fit the vectorizer to the text data and transform it into TF-IDF vectors
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# Convert TF-IDF vectors to arrays
X_train_array = X_train_tfidf.toarray()
X_test_array = X_test_tfidf.toarray()

# Convert target labels to categorical
num_classes = len(data['Target'].unique())
y_train_category = to_categorical(y_train, num_classes=num_classes)
y_test_category = to_categorical(y_test, num_classes=num_classes)

# Apply SMOTE for oversampling
smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train_array, y_train_category)

# Define the RNN model architecture
model = Sequential()
model.add(Dense(128, input_dim=X_train_resampled.shape[1], activation='relu'))
model.add(Dense(num_classes, activation='softmax'))

# Compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
epochs = 10
batch_size = 64
history = model.fit(X_train_resampled, y_train_resampled, epochs=epochs, batch_size=batch_size, validation_data=(X_test_array, y_test_category), verbose=2)

# Evaluate the model
_, accuracy = model.evaluate(X_test_array, y_test_category)
print('Accuracy:', accuracy)

model.save('RNN_Model.h5')
joblib.dump(vectorizer, 'vectorizer.pkl')


found 0 physical cores < 1
  File "d:\Python\envs\test\Lib\site-packages\joblib\externals\loky\backend\context.py", line 282, in _count_physical_cores
    raise ValueError(f"found {cpu_count_physical} physical cores < 1")
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
17/17 - 2s - 88ms/step - accuracy: 0.6388 - loss: 1.0594 - val_accuracy: 0.7486 - val_loss: 1.0265
Epoch 2/10
17/17 - 0s - 11ms/step - accuracy: 0.9842 - loss: 0.9160 - val_accuracy: 0.8579 - val_loss: 0.9290
Epoch 3/10
17/17 - 0s - 12ms/step - accuracy: 0.9926 - loss: 0.7072 - val_accuracy: 0.8743 - val_loss: 0.7849
Epoch 4/10
17/17 - 0s - 11ms/step - accuracy: 0.9954 - loss: 0.4778 - val_accuracy: 0.8798 - val_loss: 0.6258
Epoch 5/10
17/17 - 0s - 9ms/step - accuracy: 0.9972 - loss: 0.2963 - val_accuracy: 0.8962 - val_loss: 0.4981
Epoch 6/10
17/17 - 0s - 10ms/step - accuracy: 0.9972 - loss: 0.1818 - val_accuracy: 0.9016 - val_loss: 0.4160
Epoch 7/10
17/17 - 0s - 10ms/step - accuracy: 0.9972 - loss: 0.1171 - val_accuracy: 0.9016 - val_loss: 0.3706
Epoch 8/10
17/17 - 0s - 11ms/step - accuracy: 0.9981 - loss: 0.0806 - val_accuracy: 0.9016 - val_loss: 0.3399
Epoch 9/10
17/17 - 0s - 9ms/step - accuracy: 0.9981 - loss: 0.0587 - val_accuracy: 0.9016 - val_loss: 0.3214
Epoch 10/10




Accuracy: 0.9016393423080444


['vectorizer.pkl']

In [17]:
# Function to make predictions
def predict_sentiment(input_text, vectorizer):
    # Transform input text into TF-IDF vectors using the provided vectorizer
    text_tfidf = vectorizer.transform([input_text])

    # Convert TF-IDF vectors to arrays
    text_array = text_tfidf.toarray()

    # Make predictions
    predictions = model.predict(text_array)

    # Get the predicted class (assuming single-label classification)
    predicted_class = np.argmax(predictions[0])

    # Map the predicted class back to its original sentiment label
    sentiment_labels = {0: 'Negative', 1: 'Neutral', 2: 'Positive'}
    predicted_sentiment = sentiment_labels[predicted_class]

    return predicted_sentiment

# Example usage
input_text = "I cry"
# Assuming you have access to the fitted vectorizer object (let's call it 'vectorizer')
predicted_sentiment = predict_sentiment(input_text, vectorizer)
print("Predicted sentiment:", predicted_sentiment)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 117ms/step
Predicted sentiment: Negative


# Deployment

In [18]:
file='RNN'
joblib.dump(model,"RNN")
model=joblib.load(open("RNN",'rb'))

  saveable.load_own_variables(weights_store.get(inner_path))
