Unzip the file

In [5]:
!unzip -q sample_data/archive.zip -d archive


Check the Unzipped Files

In [6]:
!ls archive


test.txt  train.txt  val.txt


Load with pandas

In [7]:
import pandas as pd

# Load all 3 splits
train_df = pd.read_csv("archive/train.txt", sep=';', names=["text", "emotion"])
val_df = pd.read_csv("archive/val.txt", sep=';', names=["text", "emotion"])
test_df = pd.read_csv("archive/test.txt", sep=';', names=["text", "emotion"])

# Preview a few rows
train_df.head()


Unnamed: 0,text,emotion
0,i didnt feel humiliated,sadness
1,i can go from feeling so hopeless to so damned...,sadness
2,im grabbing a minute to post i feel greedy wrong,anger
3,i am ever feeling nostalgic about the fireplac...,love
4,i am feeling grouchy,anger


Preprocess & Vectorize the Text

In [8]:
from sklearn.feature_extraction.text import TfidfVectorizer

# Combine train + val sets
full_train_df = pd.concat([train_df, val_df])

X_train = full_train_df['text']
y_train = full_train_df['emotion']

X_test = test_df['text']
y_test = test_df['emotion']

# Convert text to TF-IDF vectors
vectorizer = TfidfVectorizer(stop_words='english', max_features=5000)
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)


Train a Classifier (Logistic Regression)

In [9]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score

# Train the model
model = LogisticRegression(max_iter=1000)
model.fit(X_train_tfidf, y_train)

# Predict
y_pred = model.predict(X_test_tfidf)

# Accuracy & report
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


Accuracy: 0.877
              precision    recall  f1-score   support

       anger       0.88      0.83      0.85       275
        fear       0.88      0.81      0.84       224
         joy       0.86      0.95      0.91       695
        love       0.83      0.69      0.75       159
     sadness       0.91      0.92      0.91       581
    surprise       0.88      0.53      0.66        66

    accuracy                           0.88      2000
   macro avg       0.87      0.79      0.82      2000
weighted avg       0.88      0.88      0.87      2000



Predict Emotion from Custom Input

In [10]:
def predict_emotion(text):
    text_vector = vectorizer.transform([text])
    prediction = model.predict(text_vector)
    return prediction[0]

# Example usage:
print(predict_emotion("I'm really proud of myself today"))
print(predict_emotion("I can't believe this happened"))


joy
sadness


TESTING

In [11]:
print(predict_emotion("i am feeling grouchy"))


anger
