In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
train_path = "/content/drive/MyDrive/SomeFolder/train.txt"
test_path  = "/content/drive/MyDrive/SomeFolder/test.txt"
val_path   = "/content/drive/MyDrive/SomeFolder/val.txt"


**Import** **libraries**

In [None]:

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC
from sklearn.metrics import classification_report, accuracy_score
import joblib


In [None]:
from google.colab import files
uploaded = files.upload()


Saving test.txt to test.txt
Saving train.txt to train.txt
Saving val.txt to val.txt


In [None]:
train_path = "train.txt"
test_path  = "test.txt"
val_path   = "val.txt"

In [None]:
def load_txt_data(path):
    try:
        df = pd.read_csv(path, sep='\t', names=["text", "emotion"])
        if df.shape[1] != 2:
            df = pd.read_csv(path, sep=',', names=["text", "emotion"])
        return df
    except Exception as e:
        print(f"❌ Error reading {path}:\n{e}")
        return None

# Load data
train_df = load_txt_data(train_path)
val_df   = load_txt_data(val_path)
test_df  = load_txt_data(test_path)

# Peek at the data
print(train_df.head())


                                                text  emotion
0                    i didnt feel humiliated;sadness      NaN
1  i can go from feeling so hopeless to so damned...      NaN
2  im grabbing a minute to post i feel greedy wro...      NaN
3  i am ever feeling nostalgic about the fireplac...      NaN
4                         i am feeling grouchy;anger      NaN


In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer(max_features=5000)
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)


In [None]:
# Remove rows where text or emotion is missing
combined_df.dropna(subset=["text", "emotion"], inplace=True)
test_df.dropna(subset=["text", "emotion"], inplace=True)

# Redefine X and y after cleaning
X_train = combined_df['text']
y_train = combined_df['emotion']

X_test = test_df['text']
y_test = test_df['emotion']


In [None]:
print("NaNs in train+val:", combined_df.isnull().sum())
print("NaNs in test:", test_df.isnull().sum())


NaNs in train+val: text       0
emotion    0
dtype: int64
NaNs in test: text       0
emotion    0
dtype: int64


In [None]:
print("X_train_vec shape:", X_train_vec.shape)
print("Length of y_train:", len(y_train))
print("\nSample y_train values:")
print(y_train.head())


X_train_vec shape: (18000, 5000)
Length of y_train: 0

Sample y_train values:
Series([], Name: emotion, dtype: float64)


In [None]:
print("y_train length:", len(y_train))
print("y_train type:", type(y_train))
print("y_train sample values:\n", y_train.head())


y_train length: 0
y_train type: <class 'pandas.core.series.Series'>
y_train sample values:
 Series([], Name: emotion, dtype: float64)


In [None]:
# Combine first
combined_df = pd.concat([train_df, val_df], ignore_index=True)

# THEN clean
combined_df.dropna(subset=["text", "emotion"], inplace=True)
test_df.dropna(subset=["text", "emotion"], inplace=True)

# THEN assign
X_train = combined_df["text"]
y_train = combined_df["emotion"]
X_test = test_df["text"]
y_test = test_df["emotion"]


In [None]:
print("Columns in combined_df:", combined_df.columns)


Columns in combined_df: Index(['text', 'emotion'], dtype='object')


In [None]:
with open("train.txt", "r") as f:
    for _ in range(5):
        print(f.readline())


i didnt feel humiliated;sadness

i can go from feeling so hopeless to so damned hopeful just from being around someone who cares and is awake;sadness

im grabbing a minute to post i feel greedy wrong;anger

i am ever feeling nostalgic about the fireplace i will know that it is still on the property;love

i am feeling grouchy;anger



In [None]:
import pandas as pd

# Correct file loading
train_df = pd.read_csv("train.txt", sep=';', names=["text", "emotion"], engine='python')
val_df   = pd.read_csv("val.txt", sep=';', names=["text", "emotion"], engine='python')
test_df  = pd.read_csv("test.txt", sep=';', names=["text", "emotion"], engine='python')


In [None]:
print(train_df.head())
print("Emotions:", train_df['emotion'].unique())


                                                text  emotion
0                            i didnt feel humiliated  sadness
1  i can go from feeling so hopeless to so damned...  sadness
2   im grabbing a minute to post i feel greedy wrong    anger
3  i am ever feeling nostalgic about the fireplac...     love
4                               i am feeling grouchy    anger
Emotions: ['sadness' 'anger' 'love' 'surprise' 'fear' 'joy']


In [None]:
# Combine and clean
combined_df = pd.concat([train_df, val_df], ignore_index=True)
combined_df.dropna(subset=["text", "emotion"], inplace=True)
combined_df = combined_df[combined_df['emotion'].astype(str).str.strip() != ""]

# Test set clean
test_df.dropna(subset=["text", "emotion"], inplace=True)
test_df = test_df[test_df['emotion'].astype(str).str.strip() != ""]

# Assign X and y
X_train = combined_df["text"]
y_train = combined_df["emotion"]
X_test = test_df["text"]
y_test = test_df["emotion"]


In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer

# Create vectorizer and transform data
vectorizer = TfidfVectorizer()
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)


In [None]:
from sklearn.svm import LinearSVC

model = LinearSVC()
model.fit(X_train_vec, y_train)


In [None]:
from sklearn.metrics import accuracy_score, classification_report

y_pred = model.predict(X_test_vec)

print("✅ Accuracy:", accuracy_score(y_test, y_pred))
print("\n📊 Classification Report:\n")
print(classification_report(y_test, y_pred))


✅ Accuracy: 0.8895

📊 Classification Report:

              precision    recall  f1-score   support

       anger       0.88      0.88      0.88       275
        fear       0.87      0.83      0.85       224
         joy       0.90      0.93      0.92       695
        love       0.81      0.77      0.79       159
     sadness       0.92      0.93      0.92       581
    surprise       0.72      0.64      0.68        66

    accuracy                           0.89      2000
   macro avg       0.85      0.83      0.84      2000
weighted avg       0.89      0.89      0.89      2000



In [None]:
def predict_emotion(text):
    vec = vectorizer.transform([text])
    return model.predict(vec)[0]


print("🧠 Prediction:", predict_emotion("I feel so overwhelmed and tired"))


🧠 Prediction: surprise


In [None]:
def predict_emotion(text):
    vec = vectorizer.transform([text])
    return model.predict(vec)[0]

while True:
    user_input = input("📝 Enter a sentence (or type 'exit' to quit): ")
    if user_input.lower() == 'exit':
        print("👋 Exiting emotion predictor.")
        break
    prediction = predict_emotion(user_input)
    print(f"💡 Detected Emotion: {prediction}\n")


💡 Detected Emotion: sadness



In [None]:
import joblib


joblib.dump(model, "text_model.pkl")

joblib.dump(vectorizer, "vectorizer.pkl")

print("✅ Text model & vectorizer saved!")


✅ Text model & vectorizer saved!
