In [1]:
import pandas as pd
import numpy as np
import re
import nltk
import joblib
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer, WordNetLemmatizer
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical
from keras import Sequential
from keras.layers import Dense

In [2]:
df =pd.read_csv("twitter_training.csv")

In [3]:
df

Unnamed: 0,2401,Borderlands,Positive,"im getting on borderlands and i will murder you all ,"
0,2401,Borderlands,Positive,I am coming to the borders and I will kill you...
1,2401,Borderlands,Positive,im getting on borderlands and i will kill you ...
2,2401,Borderlands,Positive,im coming on borderlands and i will murder you...
3,2401,Borderlands,Positive,im getting on borderlands 2 and i will murder ...
4,2401,Borderlands,Positive,im getting into borderlands and i can murder y...
...,...,...,...,...
74676,9200,Nvidia,Positive,Just realized that the Windows partition of my...
74677,9200,Nvidia,Positive,Just realized that my Mac window partition is ...
74678,9200,Nvidia,Positive,Just realized the windows partition of my Mac ...
74679,9200,Nvidia,Positive,Just realized between the windows partition of...


In [4]:
df = df.drop(['2401','Borderlands'],axis=1)

In [5]:
df.columns

Index(['Positive', 'im getting on borderlands and i will murder you all ,'], dtype='object')

In [6]:
new_coloumns = ['sentiment','text']

In [7]:
df.columns=new_coloumns

In [8]:
df

Unnamed: 0,sentiment,text
0,Positive,I am coming to the borders and I will kill you...
1,Positive,im getting on borderlands and i will kill you ...
2,Positive,im coming on borderlands and i will murder you...
3,Positive,im getting on borderlands 2 and i will murder ...
4,Positive,im getting into borderlands and i can murder y...
...,...,...
74676,Positive,Just realized that the Windows partition of my...
74677,Positive,Just realized that my Mac window partition is ...
74678,Positive,Just realized the windows partition of my Mac ...
74679,Positive,Just realized between the windows partition of...


In [9]:
df['sentiment'].unique()

array(['Positive', 'Neutral', 'Negative', 'Irrelevant'], dtype=object)

In [10]:
#le = LabelEncoder()

In [11]:
#df['sentiment']=le.fit_transform(df['sentiment'])
#df['text']=le.fit_transform(df['text'])

In [12]:
df['text'] = df['text'].astype(str)

In [13]:
df['clean_text'] = df['text'].apply(lambda x: re.sub("<.*?>", "", x))
df['clean_text'] = df['clean_text'].apply(lambda x: re.sub(r'[^\w\s]', "", x))
df['clean_text'] = df['clean_text'].str.lower()

In [14]:
df['tokenize_text'] = df['clean_text'].apply(lambda x: word_tokenize(x))

In [15]:
stop_words = set(stopwords.words('english'))
df['filtered_text'] = df['tokenize_text'].apply(lambda x: [word for word in x if word not in stop_words])

In [16]:
stem = PorterStemmer()
df['stem_text'] = df['filtered_text'].apply(lambda x: [stem.stem(word) for word in x])

In [17]:
lemma = WordNetLemmatizer()
df['lemma_text'] = df['filtered_text'].apply(lambda x: [lemma.lemmatize(word) for word in x])

In [18]:
X = df['stem_text'].apply(lambda x: ' '.join(x))
y = df['sentiment']

In [19]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [20]:
tfidf = TfidfVectorizer()
X_train = tfidf.fit_transform(X_train)
X_test = tfidf.transform(X_test)

In [21]:
le = LabelEncoder()
y_train = le.fit_transform(y_train)
y_test = le.transform(y_test)

In [22]:
y_train = to_categorical(y_train, num_classes=4)
y_test = to_categorical(y_test, num_classes=4)

In [23]:
model = Sequential([
    Dense(128, activation="relu", input_shape=(X_train.shape[1],)),
    Dense(64, activation="relu"),
    Dense(32, activation="relu"),
    Dense(4, activation="softmax")  # Output layer for 4 classes
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [24]:
model.fit(X_train, y_train, epochs=10)

Epoch 1/10
[1m1867/1867[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 19ms/step - accuracy: 0.5984 - loss: 0.9491
Epoch 2/10
[1m1867/1867[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 18ms/step - accuracy: 0.9066 - loss: 0.2496
Epoch 3/10
[1m1867/1867[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 18ms/step - accuracy: 0.9523 - loss: 0.1208
Epoch 4/10
[1m1867/1867[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 18ms/step - accuracy: 0.9614 - loss: 0.0880
Epoch 5/10
[1m1867/1867[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 18ms/step - accuracy: 0.9634 - loss: 0.0808
Epoch 6/10
[1m1867/1867[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 19ms/step - accuracy: 0.9655 - loss: 0.0728
Epoch 7/10
[1m1867/1867[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 18ms/step - accuracy: 0.9666 - loss: 0.0734
Epoch 8/10
[1m1867/1867[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 19ms/step - accuracy: 0.9665 - loss: 0.0697
Epoch 9/

<keras.src.callbacks.history.History at 0x29101921d00>

In [25]:
model.save('model.h5')
joblib.dump(tfidf, 'tfidf.pkl')



['tfidf.pkl']

In [26]:
import streamlit as st
import re
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
import joblib
import nltk
from keras.models import load_model
import numpy as np

# Load model and vectorizer
model = load_model('model.h5')  # Use Keras to load the model
tf_idf_vector = joblib.load('tfidf.pkl')

# Initialize NLP components
stem = PorterStemmer()
stop_words = set(stopwords.words('english'))

def predict_sentiment(review):
    # Preprocess the review
    cleaned_review = re.sub('<.*?>', '', review)
    cleaned_review = re.sub(r'[^\w\s]', '', cleaned_review)
    cleaned_review = cleaned_review.lower()
    tokenized_review = word_tokenize(cleaned_review)
    filtered_review = [word for word in tokenized_review if word not in stop_words]
    stemmed_review = [stem.stem(word) for word in filtered_review]
    
    # Transform review to TF-IDF features
    tfidf_review = tf_idf_vector.transform([' '.join(stemmed_review)])
    
    # Predict sentiment
    sentiment_prediction = model.predict(tfidf_review)[0]
    
    # Determine the class with the highest probability
    sentiment_class = np.argmax(sentiment_prediction)
    
    # Define sentiment labels
    sentiment_labels = ["Negative", "Neutral", "Positive", "Irrelevant"]  # Update with actual labels
    return sentiment_labels[sentiment_class]

# Streamlit UI
st.title('Sentiment Analysis')
review_to_predict = st.text_area('Enter your review here:')

if st.button('Predict Sentiment'):
    predicted_sentiment = predict_sentiment(review_to_predict)
    st.write("Predicted Sentiment:", predicted_sentiment)


2024-08-09 18:36:10.070 
  command:

    streamlit run C:\Users\bhuva\AppData\Local\Programs\Python\Python312\Lib\site-packages\ipykernel_launcher.py [ARGUMENTS]
2024-08-09 18:36:10.071 Session state does not function when running a script without `streamlit run`


In [27]:
!ipynb-py-convert Untitled.ipynb Untitled.py