In [1]:
!pip install kagglehub



In [2]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("praveengovi/emotions-dataset-for-nlp")

print("Path to dataset files:", path)

Path to dataset files: C:\Users\rohit\.cache\kagglehub\datasets\praveengovi\emotions-dataset-for-nlp\versions\1


In [3]:
pip install pandas seaborn neattext scikit-learn joblib


Collecting neattext
  Using cached neattext-0.1.3-py3-none-any.whl.metadata (12 kB)
Using cached neattext-0.1.3-py3-none-any.whl (114 kB)
Installing collected packages: neattext
Successfully installed neattext-0.1.3
Note: you may need to restart the kernel to use updated packages.


In [4]:
import pandas as pd

# Define file paths
base_path = r"C:\Users\rohit\.cache\kagglehub\datasets\praveengovi\emotions-dataset-for-nlp\versions\1"
train_file = f"{base_path}\\train.txt"
test_file = f"{base_path}\\test.txt"
val_file = f"{base_path}\\val.txt"

# Load datasets
train_df = pd.read_csv(train_file, names=["Text", "Emotion"], sep=";")
test_df = pd.read_csv(test_file, names=["Text", "Emotion"], sep=";")
val_df = pd.read_csv(val_file, names=["Text", "Emotion"], sep=";")

# Combine all for better training
df = pd.concat([train_df, test_df, val_df], ignore_index=True)

print(df.head())


                                                Text  Emotion
0                            i didnt feel humiliated  sadness
1  i can go from feeling so hopeless to so damned...  sadness
2   im grabbing a minute to post i feel greedy wrong    anger
3  i am ever feeling nostalgic about the fireplac...     love
4                               i am feeling grouchy    anger


In [5]:
import neattext.functions as nfx

# Clean text
df['Clean_Text'] = df['Text'].apply(nfx.remove_stopwords).apply(nfx.remove_punctuations)


In [6]:
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_extraction.text import TfidfVectorizer

# Encode labels
le = LabelEncoder()
df['Emotion_Label'] = le.fit_transform(df['Emotion'])

# Split
X = df['Clean_Text']
y = df['Emotion_Label']


# TF-IDF with bigrams
tfidf = TfidfVectorizer(max_features=10000, ngram_range=(1, 2))

# Fit and transform the clean text
Xfeatures = tfidf.fit_transform(df['Clean_Text'])



In [7]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression  # or your existing model
from sklearn.metrics import classification_report, accuracy_score

X_train, X_test, y_train, y_test = train_test_split(Xfeatures, y, test_size=0.2, random_state=42)

model = LogisticRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


Accuracy: 0.88525
              precision    recall  f1-score   support

           0       0.94      0.84      0.89       536
           1       0.88      0.81      0.84       458
           2       0.85      0.96      0.90      1339
           3       0.87      0.68      0.76       335
           4       0.91      0.95      0.93      1173
           5       0.88      0.54      0.67       159

    accuracy                           0.89      4000
   macro avg       0.89      0.80      0.83      4000
weighted avg       0.89      0.89      0.88      4000



In [8]:
import joblib

joblib.dump(model, "emotion_model.pkl")
joblib.dump(tfidf, "vectorizer.pkl")
joblib.dump(le, "label_encoder.pkl")


['label_encoder.pkl']

In [3]:
import joblib

model = joblib.load("emotion_model.pkl")
tfidf = joblib.load("vectorizer.pkl")
le = joblib.load("label_encoder.pkl")

print("Emotion Detection Chatbot (type 'quit' to exit)")
while True:
    text = input("You: ")
    if text.lower() == "quit":
        print("Chatbot: Goodbye!")
        break
    vec = tfidf.transform([text])
    pred = model.predict(vec)
    emotion = le.inverse_transform(pred)[0]
    print(f"Chatbot: You seem to be feeling **{emotion}**.")


Emotion Detection Chatbot (type 'quit' to exit)


You:  LOST


Chatbot: You seem to be feeling **sadness**.


You:  QUIT


Chatbot: Goodbye!
