In [1]:
!pip install neattext


Collecting neattext
  Downloading neattext-0.1.3-py3-none-any.whl.metadata (12 kB)
Downloading neattext-0.1.3-py3-none-any.whl (114 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/114.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m [32m112.6/114.7 kB[0m [31m4.3 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m114.7/114.7 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: neattext
Successfully installed neattext-0.1.3


In [2]:
import numpy as np
import pandas as pd
import seaborn as sns
import neattext.functions as nfx
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report, accuracy_score
import joblib


In [3]:
# Sample labeled data
data = {
    'text': [
        "I am feeling great today!",
        "This is the best moment ever!",
        "I'm so sad and lonely.",
        "Why does everything go wrong with me?",
        "I’m so angry right now!",
        "That makes me furious and mad!",
        "I love how this turned out!",
        "I hate everything today!",
        "Feeling awesome and cheerful!",
        "This is really frustrating and upsetting."
    ],
    'emotion': [
        "happy", "happy", "sad", "sad", "angry",
        "angry", "happy", "angry", "happy", "sad"
    ]
}

df = pd.DataFrame(data)
df


Unnamed: 0,text,emotion
0,I am feeling great today!,happy
1,This is the best moment ever!,happy
2,I'm so sad and lonely.,sad
3,Why does everything go wrong with me?,sad
4,I’m so angry right now!,angry
5,That makes me furious and mad!,angry
6,I love how this turned out!,happy
7,I hate everything today!,angry
8,Feeling awesome and cheerful!,happy
9,This is really frustrating and upsetting.,sad


In [4]:
# Clean text using neattext
df['clean_text'] = df['text'].apply(nfx.remove_stopwords)
df['clean_text'] = df['clean_text'].apply(nfx.remove_punctuations)
df.head()


Unnamed: 0,text,emotion,clean_text
0,I am feeling great today!,happy,feeling great today
1,This is the best moment ever!,happy,best moment ever
2,I'm so sad and lonely.,sad,Im sad lonely
3,Why does everything go wrong with me?,sad,wrong me
4,I’m so angry right now!,angry,I’m angry right now


In [5]:
# Vectorize clean text
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(df['clean_text'])

# Labels
y = df['emotion']


In [6]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [7]:
model = MultinomialNB()
model.fit(X_train, y_train)


In [8]:
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


Accuracy: 0.5
              precision    recall  f1-score   support

       happy       1.00      0.50      0.67         2
         sad       0.00      0.00      0.00         0

    accuracy                           0.50         2
   macro avg       0.50      0.25      0.33         2
weighted avg       1.00      0.50      0.67         2



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [9]:
joblib.dump(model, "emotion_model.pkl")
joblib.dump(vectorizer, "vectorizer.pkl")


['vectorizer.pkl']

In [10]:
# Load model and vectorizer
loaded_model = joblib.load("emotion_model.pkl")
loaded_vectorizer = joblib.load("vectorizer.pkl")

# Predict a new sample
sample_text = ["I'm feeling so amazing and energetic today!"]
cleaned_sample = [nfx.remove_punctuations(nfx.remove_stopwords(sample_text[0]))]
vec_sample = loaded_vectorizer.transform(cleaned_sample)
prediction = loaded_model.predict(vec_sample)
print("Predicted Emotion:", prediction[0])


Predicted Emotion: happy
