In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import re, regex
from gensim.models import Word2Vec
from sklearn.multiclass import OneVsRestClassifier
from sklearn.metrics import make_scorer, classification_report, precision_score, recall_score, f1_score, accuracy_score, jaccard_score
from sklearn.model_selection import GridSearchCV
from nltk.tokenize import word_tokenize
import nltk
nltk.download('punkt')


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\kaifa\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [2]:
train_df = pd.read_csv("../data/train.csv")
val_df   = pd.read_csv("../data/val.csv")
test_df  = pd.read_csv("../data/test.csv")

In [3]:
TARGET_EMOTIONS = ['joy','sadness','anger','fear','surprise','disgust','neutral','love']
X_train, y_train = train_df["text"], train_df[TARGET_EMOTIONS]
X_val, y_val     = val_df["text"], val_df[TARGET_EMOTIONS]
X_test, y_test   = test_df["text"], test_df[TARGET_EMOTIONS]


In [4]:
def preprocess(text):
    text = re.sub(r"[^a-zA-Z\s]", "", text.lower())
    return word_tokenize(text)

train_tokens = X_train.apply(preprocess)
val_tokens   = X_val.apply(preprocess)
test_tokens  = X_test.apply(preprocess)

w2v_model = Word2Vec(
    sentences=train_tokens,
    vector_size=500,  # embedding dimension
    window=5,
    min_count=2,
    workers=4,
    sg=1,             
    epochs=10
)

Exception ignored in: 'gensim.models.word2vec_inner.our_dot_float'


In [5]:
print("✅ Word2Vec trained — vocabulary size:", len(w2v_model.wv))

✅ Word2Vec trained — vocabulary size: 11334


In [7]:
def sentence_vector(tokens, model, dim=300):
    vec = np.zeros(dim)
    count = 0
    for word in tokens:
        if word in model.wv:
            vec += model.wv[word]
            count += 1
            
    if count != 0:
        vec /= count
    return vec

X_train_vec = np.vstack(train_tokens.apply(lambda x: sentence_vector(x, w2v_model, dim=500)))
X_val_vec   = np.vstack(val_tokens.apply(lambda x: sentence_vector(x, w2v_model, dim=500)))
X_test_vec  = np.vstack(test_tokens.apply(lambda x: sentence_vector(x, w2v_model, dim=500)))
X_train_vec.shape, X_val_vec.shape, X_test_vec.shape

((31749, 500), (6803, 500), (6804, 500))

In [8]:
def evaluate_model(clf, X, y, dataset_name="Dataset", get_classification_report=False):
    y_pred = clf.predict(X)

    print(f"{dataset_name} Report:")
    if get_classification_report:
        print("\nValidation Classification Report:")
        print(classification_report(y, y_pred, target_names=TARGET_EMOTIONS, digits=3))

    micro_p = precision_score(y, y_pred, average="micro")
    micro_r = recall_score(y, y_pred, average="micro")
    micro_f1 = f1_score(y, y_pred, average="micro")

    macro_p = precision_score(y, y_pred, average="macro")
    macro_r = recall_score(y, y_pred, average="macro")
    macro_f1 = f1_score(y, y_pred, average="macro")

    print(f"Micro Precision: {micro_p:.3f}, Micro Recall: {micro_r:.3f}, Micro F1: {micro_f1:.3f}")
    print(f"Macro Precision: {macro_p:.3f}, Macro Recall: {macro_r:.3f}, Macro F1: {macro_f1:.3f}")

    subset_acc = accuracy_score(y, y_pred)
    jaccard_acc = jaccard_score(y, y_pred, average="samples")

    print(f"Subset Accuracy (Exact Match): {subset_acc:.3f}")
    print(f"Jaccard Accuracy (Sample-based): {jaccard_acc:.3f}")
    return {
        "micro_precision": micro_p,
        "micro_recall": micro_r,
        "micro_f1": micro_f1,
        "macro_precision": macro_p,
        "macro_recall": macro_r,
        "macro_f1": macro_f1,
        "subset_accuracy": subset_acc,
        "jaccard_accuracy": jaccard_acc
    }

### Logistic Regression

In [13]:
from sklearn.linear_model import LogisticRegression
from sklearn.exceptions import ConvergenceWarning
import warnings
warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=ConvergenceWarning)


In [17]:
base_clf = LogisticRegression()
clf = OneVsRestClassifier(base_clf, n_jobs=-1)

In [None]:
param_grid = {
    "estimator__solver": ['liblinear', 'saga'],
    "estimator__C": [0.5, 1.0, 2.0],
    "estimator__penalty": ['l1', 'l2'],
    "estimator__class_weight": [None, 'balanced'],
    "estimator__max_iter": [500, 1000, 1500]
}
f1_micro = make_scorer(f1_score, average='micro')

grid = GridSearchCV(
    estimator=clf,
    param_grid=param_grid,
    scoring=f1_micro,
    cv=3,
    n_jobs=2
)

grid.fit(X_train_vec, y_train)
print("\nGrid Search Complete.")
print("Best Params:", grid.best_params_)
print("Best Cross-Validated Micro-F1:", grid.best_score_)
best_model = grid.best_estimator_
evaluate_model(best_model, X_val_vec, y_val, dataset_name="Validation Set", get_classification_report=True)

best_result = evaluate_model(best_model, X_test_vec, y_test, dataset_name="Test Set")
result = {'Model' : 'Logistic Regression (One-vs-Rest)'}

result.update(best_result)
result_df = pd.DataFrame([result])
result_df.to_csv('../results/best_model_test_results_w2v.csv', index=False)

results = pd.DataFrame(grid.cv_results_)
os.makedirs("../results", exist_ok=True)
results.to_csv("../results/gridsearch_LR_results_w2v.csv", index=False)

### Naive Bayes

In [None]:
from sklearn.naive_bayes import GaussianNB
from sklearn.exceptions import ConvergenceWarning
import warnings
warnings.filterwarnings("ignore", category=ConvergenceWarning)
warnings.filterwarnings("ignore", category=UserWarning)

In [None]:
clf = OneVsRestClassifier(GaussianNB())

In [None]:
param_grid = {
    # "estimator": [GaussianNB()],  
    "estimator__alpha": [0.1, 0.5, 1.0, 2.0],        
    "estimator__fit_prior": [True, False]
}
f1_micro = make_scorer(f1_score, average='micro')
grid = GridSearchCV(
    estimator=clf,
    param_grid=param_grid,
    scoring=f1_micro,
    cv=3,
    n_jobs=-1,
    verbose=0
)
grid.fit(X_train_vec, y_train)
print("\nGrid Search Complete.")
print("Best Params:", grid.best_params_)
print("Best Cross-Validated Micro-F1:", grid.best_score_)

best_model = grid.best_estimator_
evaluate_model(best_model, X_val_vec, y_val, dataset_name="Validation Set", get_classification_report=True)
best_result = evaluate_model(best_model, X_test_vec, y_test, dataset_name="Test Set")
result = {'Model' : 'Gaussian Naive Bayes (One-vs-Rest)'}

result.update(best_result)
result_df = pd.read_csv('../results/best_model_test_results_w2v.csv') if os.path.exists('../results/best_model_test_results_w2v.csv') else pd.DataFrame()
result_df = pd.concat([result_df, pd.DataFrame([result])], ignore_index=True)
os.makedirs("../results", exist_ok=True)
result_df.to_csv("../results/best_model_test_results_w2v.csv", index=False)

results = pd.DataFrame(grid.cv_results_)
os.makedirs("../results", exist_ok=True)
results.to_csv("../results/gridsearch_GNB_results.csv", index=False)

ValueError: 
All the 24 fits failed.
It is very likely that your model is misconfigured.
You can try to debug the error by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
24 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\kaifa\anaconda3\envs\inpaint\lib\site-packages\sklearn\model_selection\_validation.py", line 866, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\kaifa\anaconda3\envs\inpaint\lib\site-packages\sklearn\base.py", line 1389, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "c:\Users\kaifa\anaconda3\envs\inpaint\lib\site-packages\sklearn\multiclass.py", line 376, in fit
    self.estimators_ = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)(
  File "c:\Users\kaifa\anaconda3\envs\inpaint\lib\site-packages\sklearn\utils\parallel.py", line 77, in __call__
    return super().__call__(iterable_with_config)
  File "c:\Users\kaifa\anaconda3\envs\inpaint\lib\site-packages\joblib\parallel.py", line 1986, in __call__
    return output if self.return_generator else list(output)
  File "c:\Users\kaifa\anaconda3\envs\inpaint\lib\site-packages\joblib\parallel.py", line 1914, in _get_sequential_output
    res = func(*args, **kwargs)
  File "c:\Users\kaifa\anaconda3\envs\inpaint\lib\site-packages\sklearn\utils\parallel.py", line 139, in __call__
    return self.function(*args, **kwargs)
  File "c:\Users\kaifa\anaconda3\envs\inpaint\lib\site-packages\sklearn\multiclass.py", line 96, in _fit_binary
    estimator.fit(X, y, **fit_params)
  File "c:\Users\kaifa\anaconda3\envs\inpaint\lib\site-packages\sklearn\base.py", line 1389, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "c:\Users\kaifa\anaconda3\envs\inpaint\lib\site-packages\sklearn\naive_bayes.py", line 762, in fit
    self._count(X, Y)
  File "c:\Users\kaifa\anaconda3\envs\inpaint\lib\site-packages\sklearn\naive_bayes.py", line 889, in _count
    check_non_negative(X, "MultinomialNB (input X)")
  File "c:\Users\kaifa\anaconda3\envs\inpaint\lib\site-packages\sklearn\utils\validation.py", line 1827, in check_non_negative
    raise ValueError(f"Negative values in data passed to {whom}.")
ValueError: Negative values in data passed to MultinomialNB (input X).


### Linear SVM

In [None]:
from sklearn.svm import LinearSVC, SVC

In [None]:
clf = OneVsRestClassifier(LinearSVC())

In [None]:
param_grid = {
    "estimator__C": [0.1, 0.5, 1.0, 2.0, 5.0],
    "estimator__class_weight": [None, "balanced"],
    "estimator__loss": ["hinge", "squared_hinge"],
    "estimator__max_iter": [1000, 1500]
}
f1_micro = make_scorer(f1_score, average="micro")

grid = GridSearchCV(
    estimator=clf,
    param_grid=param_grid,
    scoring=f1_micro,
    cv=3,
    n_jobs=-1,
    verbose=0
)
print("Starting Grid Search for Linear SVM...")
grid.fit(X_train_vec, y_train)
print("\nGrid Search Complete.")
print("Best Params:", grid.best_params_)
print("Best Cross-Validated Micro-F1:", grid.best_score_)

best_model = grid.best_estimator_
evaluate_model(best_model, X_val_vec, y_val, dataset_name="Validation Set", get_classification_report=True)
best_result = evaluate_model(best_model, X_test_vec, y_test, dataset_name="Test Set")
result = {'Model' : 'Linear SVM (One-vs-Rest)'}

result.update(best_result)
result_df = pd.read_csv('../results/best_model_test_results_w2v.csv')
result_df = pd.concat([result_df, pd.DataFrame([result])], ignore_index=True)
result_df.to_csv("../results/best_model_test_results_w2v.csv", index=False)

results = pd.DataFrame(grid.cv_results_)
os.makedirs("../results", exist_ok=True)
results.to_csv("../results/gridsearch_Linear_SVC_results.csv", index=False)

In [38]:
clf = OneVsRestClassifier(SVC())

In [None]:
param_grid = {
    "estimator__C": [0.1, 0.5, 1.0, 2.0, 5.0],
    "estimator__class_weight": [None, "balanced"],
    "estimator__loss": ["hinge", "squared_hinge"],
    "estimator__max_iter": [1000, 1500]
}
f1_micro = make_scorer(f1_score, average="micro")

grid = GridSearchCV(
    estimator=clf,
    param_grid=param_grid,
    scoring=f1_micro,
    cv=3,
    n_jobs=-1,
    verbose=0
)
print("Starting Grid Search for Linear SVM...")
grid.fit(X_train_vec, y_train)
print("\nGrid Search Complete.")
print("Best Params:", grid.best_params_)
print("Best Cross-Validated Micro-F1:", grid.best_score_)

best_model = grid.best_estimator_
evaluate_model(best_model, X_val_vec, y_val, dataset_name="Validation Set", get_classification_report=True)
best_result = evaluate_model(best_model, X_test_vec, y_test, dataset_name="Test Set")
result = {'Model' : 'Linear SVM (One-vs-Rest)'}

result.update(best_result)
result_df = pd.read_csv('../results/best_model_test_results_w2v.csv')
result_df = result_df.append(result, ignore_index=True)
result_df.to_csv("../results/best_model_test_results_w2v.csv", index=False)

results = pd.DataFrame(grid.cv_results_)
os.makedirs("../results", exist_ok=True)
results.to_csv("../results/gridsearch_NB_results.csv", index=False)

### Ridge Classifier

In [None]:
from sklearn.linear_model import RidgeClassifier

In [None]:
clf = OneVsRestClassifier(RidgeClassifier())

In [None]:
param_grid = {
    "estimator__alpha": [0.1, 0.5, 1.0, 2.0, 5.0],
    "estimator__tol": [1e-3, 1e-4],
    "estimator__solver": ["auto", "sparse_cg", "lsqr"]
}
f1_micro = make_scorer(f1_score, average="micro")
grid = GridSearchCV(
    estimator=clf,
    param_grid=param_grid,
    scoring=f1_micro,
    cv=3,
    n_jobs=-1,
    verbose=0
)

print("Starting Grid Search for Ridge Classifier...")
grid.fit(X_train_vec, y_train)
print("\nGrid Search Complete.")
print("Best Params:", grid.best_params_)
print("Best Cross-Validated Micro-F1:", grid.best_score_)

best_model = grid.best_estimator_
evaluate_model(best_model, X_val_vec, y_val, dataset_name="Validation Set", get_classification_report=True)
best_result = evaluate_model(best_model, X_test_vec, y_test, dataset_name="Test Set")
result = {'Model' : 'Ridge Classifier (One-vs-Rest)'}

result.update(best_result)
result_df = pd.read_csv('../results/best_model_test_results_w2v.csv')
result_df = pd.concat([result_df, pd.DataFrame([result])], ignore_index=True)
result_df.to_csv("../results/best_model_test_results_w2v.csv", index=False)

results = pd.DataFrame(grid.cv_results_)
os.makedirs("../results", exist_ok=True)
results.to_csv("../results/gridsearch_Ridge_Classifier_results.csv", index=False)

Starting Grid Search for Ridge Classifier...

Grid Search Complete.
Best Params: {'estimator__alpha': 1.0, 'estimator__solver': 'lsqr', 'estimator__tol': 0.001}
Best Cross-Validated Micro-F1: 0.6747333570269541
Validation Set Report:

Validation Classification Report:
              precision    recall  f1-score   support

         joy      0.767     0.334     0.465       866
     sadness      0.760     0.325     0.455       662
       anger      0.679     0.289     0.405       866
        fear      0.790     0.319     0.455       307
    surprise      0.778     0.298     0.431       554
     disgust      0.673     0.223     0.335       600
     neutral      0.804     0.890     0.845      4752
        love      0.862     0.555     0.676       641

   micro avg      0.793     0.620     0.696      9248
   macro avg      0.764     0.404     0.508      9248
weighted avg      0.779     0.620     0.658      9248
 samples avg      0.766     0.688     0.703      9248

Micro Precision: 0.793, Mi

### SGD Classifier

In [None]:
from sklearn.linear_model import SGDClassifier

In [None]:
clf = OneVsRestClassifier(SGDClassifier(random_state=42))

In [None]:
param_grid = {
    "estimator__loss": ["hinge", "log_loss", "modified_huber"],
    "estimator__penalty": ["l2", "l1", "elasticnet"],
    "estimator__alpha": [1e-5, 1e-4, 1e-3],
    "estimator__max_iter": [1000, 1500],
    "estimator__tol": [1e-3, 1e-4],
    "estimator__class_weight": [None, "balanced"]
}
f1_micro = make_scorer(f1_score, average="micro")

In [None]:
grid = GridSearchCV(
    estimator=clf,
    param_grid=param_grid,
    scoring=f1_micro,
    cv=3,
    n_jobs=-1,
    verbose=0
)
print("Starting Grid Search for SGD Classifier...")
grid.fit(X_train_vec, y_train)
print("\nGrid Search Complete.")
print("Best Params:", grid.best_params_)
print("Best Cross-Validated Micro-F1:", grid.best_score_)

best_model = grid.best_estimator_
evaluate_model(best_model, X_val_vec, y_val, dataset_name="Validation Set", get_classification_report=True)
best_result = evaluate_model(best_model, X_test_vec, y_test, dataset_name="Test Set")
result = {'Model' : 'SGD Classifier (One-vs-Rest)'}

result.update(best_result)
result_df = pd.read_csv('../results/best_model_test_results_w2v.csv')
result_df = pd.concat([result_df, pd.DataFrame([result])], ignore_index=True)
result_df.to_csv("../results/best_model_test_results_w2v.csv", index=False)

results = pd.DataFrame(grid.cv_results_)
os.makedirs("../results", exist_ok=True)
results.to_csv("../results/gridsearch_SGD_Classifier_results.csv", index=False)

Starting Grid Search for SGD Classifier...

Grid Search Complete.
Best Params: {'estimator__alpha': 0.0001, 'estimator__class_weight': None, 'estimator__loss': 'hinge', 'estimator__max_iter': 1000, 'estimator__penalty': 'l1', 'estimator__tol': 0.0001}
Best Cross-Validated Micro-F1: 0.7034871849957444
Validation Set Report:

Validation Classification Report:
              precision    recall  f1-score   support

         joy      0.825     0.311     0.451       866
     sadness      0.795     0.293     0.428       662
       anger      0.713     0.269     0.391       866
        fear      0.777     0.375     0.505       307
    surprise      0.811     0.318     0.457       554
     disgust      0.702     0.220     0.335       600
     neutral      0.805     0.916     0.857      4752
        love      0.870     0.658     0.750       641

   micro avg      0.803     0.637     0.711      9248
   macro avg      0.787     0.420     0.522      9248
weighted avg      0.795     0.637     0.668 

### Ensemble (Voting)

In [None]:
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB 
from sklearn.svm import LinearSVC

In [None]:
lr = LogisticRegression(C=2.0, solver='liblinear', max_iter=1000)
nb = GaussianNB(alpha=0.5)
lr2 = LogisticRegression(C=1.0, solver='liblinear', max_iter=1000)

voting_clf = OneVsRestClassifier(
    VotingClassifier(
        estimators=[('lr', lr), ('nb', nb), ('lr2', lr2), ('svc', LinearSVC(C=0.5, max_iter=1500))],
        voting='soft'
    )
)


In [None]:
voting_clf.fit(X_train_vec, y_train)
best_model = voting_clf
evaluate_model(best_model, X_val_vec, y_val, dataset_name="Validation Set", get_classification_report=True)
best_result = evaluate_model(best_model, X_test_vec, y_test, dataset_name="Test Set")
result = {'Model' : 'Voting (lr, nb, svm) (One-vs-Rest)'}

result.update(best_result)
result_df = pd.read_csv('../results/best_model_test_results_w2v.csv')
result_df = pd.concat([result_df, pd.DataFrame([result])], ignore_index=True)
result_df.to_csv("../results/best_model_test_results_w2v.csv", index=False)

Validation Set Report:

Validation Classification Report:
              precision    recall  f1-score   support

         joy      0.833     0.237     0.369       866
     sadness      0.837     0.193     0.314       662
       anger      0.832     0.194     0.315       866
        fear      0.717     0.107     0.187       307
    surprise      0.811     0.155     0.261       554
     disgust      0.753     0.112     0.194       600
     neutral      0.785     0.936     0.854      4752
        love      0.906     0.538     0.675       641

   micro avg      0.796     0.593     0.679      9248
   macro avg      0.809     0.309     0.396      9248
weighted avg      0.803     0.593     0.607      9248
 samples avg      0.767     0.665     0.692      9248

Micro Precision: 0.796, Micro Recall: 0.593, Micro F1: 0.679
Macro Precision: 0.809, Macro Recall: 0.309, Macro F1: 0.396
Subset Accuracy (Exact Match): 0.525
Jaccard Accuracy (Sample-based): 0.648
Test Set Report:
Micro Precision: 0.790