In [2]:
pip install pandas scikit-learn transformers nltk

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.0 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [7]:
pip install --upgrade pip

Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd
import re
import nltk
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
import pickle


In [3]:
nltk.download('punkt')


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\nikib\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping tokenizers\punkt.zip.


True

In [6]:
# loading the dataset
train_df=pd.read_csv('training.csv')
test_df=pd.read_csv('test.csv')

train_df.head()
test_df.head()

Unnamed: 0,text,label
0,im feeling rather rotten so im not very ambiti...,0
1,im updating my blog because i feel shitty,0
2,i never make her separate from me because i do...,0
3,i left with my bouquet of red and yellow tulip...,1
4,i was feeling a little vain when i did this one,0


In [7]:
# apply cleaning
def clean_text(text):
    text=text.lower()
    text=re.sub(r'[^\w\s]','',text)
    return text

train_df['clean_text']=train_df['text'].apply(clean_text)
test_df['clean_text']=test_df['text'].apply(clean_text)

In [8]:
# prepare data
# features and labels
X_train=train_df['clean_text']
y_train=train_df['label']

X_test=test_df['clean_text']
y_test=test_df['label']

In [9]:
# vectorize the text
vectorizer = TfidfVectorizer()
X_train_vect = vectorizer.fit_transform(X_train)
X_test_vect = vectorizer.transform(X_test)


In [10]:
# train the model
model=LogisticRegression(max_iter=1000)
model.fit(X_train_vect, y_train)

In [13]:
# evaluate the model
y_pred=model.predict(X_test_vect)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred, target_names=['sadness', 'joy', 'love', 'anger', 'fear', 'surprise']))

Accuracy: 0.8615

Classification Report:
               precision    recall  f1-score   support

     sadness       0.89      0.93      0.91       581
         joy       0.83      0.96      0.89       695
        love       0.82      0.58      0.68       159
       anger       0.90      0.81      0.85       275
        fear       0.87      0.78      0.82       224
    surprise       0.84      0.47      0.60        66

    accuracy                           0.86      2000
   macro avg       0.86      0.75      0.79      2000
weighted avg       0.86      0.86      0.86      2000



In [14]:
# make prediction
def predict_mood(text):
    text=clean_text(text)
    vectorized_text = vectorizer.transform([text])
    prediction=model.predict(vectorized_text)[0]

    mood_labels = {
        0: 'sadness',
        1: 'joy',
        2: 'love',
        3: 'anger',
        4: 'fear'
    }

    return mood_labels[prediction]

sample_text = "I'm feeling so happy and energetic today!"
print("Predicted Mood:", predict_mood(sample_text))

Predicted Mood: joy


In [15]:
# Save model
with open('mood_model.pkl', 'wb') as file:
    pickle.dump(model, file)

# Save vectorizer
with open('vectorizer.pkl', 'wb') as file:
    pickle.dump(vectorizer, file)
