In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
from sklearn.svm import SVC
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix
from sklearn.preprocessing import LabelEncoder


In [5]:
data = pd.read_csv('emotions.csv')
X = data.drop('label', axis=1)
y = data['label']

In [6]:
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

In [8]:
selector = SelectKBest(score_func=f_classif, k=1500)
X_train_selected = selector.fit_transform(X_train, y_train)
X_test_selected = selector.transform(X_test)

In [9]:
rf_model = RandomForestClassifier()
rf_model.fit(X_train_selected, y_train)

lr_model = LogisticRegression()
lr_model.fit(X_train_selected, y_train)

xgb_model = XGBClassifier()
xgb_model.fit(X_train_selected, y_train)  # Fit the XGBoost model here

svm_model = SVC()
svm_model.fit(X_train_selected, y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [10]:
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    
    # Accuracy
    accuracy = model.score(X_test, y_test)
    print(f"Accuracy: {accuracy}")

    # Precision
    precision = precision_score(y_test, y_pred, average='weighted')
    print(f"Precision: {precision}")

    # Recall (Sensitivity)
    recall = recall_score(y_test, y_pred, average='weighted')
    print(f"Recall (Sensitivity): {recall}")

    # F1 Score
    f1 = f1_score(y_test, y_pred, average='weighted')
    print(f"F1 Score: {f1}")

    # Confusion Matrix
    cm = confusion_matrix(y_test, y_pred)
    print("Confusion Matrix:")
    print(cm)

In [11]:
print("Random Forest:")
evaluate_model(rf_model, X_test_selected, y_test)

print("\nLogistic Regression:")
evaluate_model(lr_model, X_test_selected, y_test)

print("\nXGBoost:")
evaluate_model(xgb_model, X_test_selected, y_test)  # Call the evaluation function after fitting XGBoost

print("\nSVM:")
evaluate_model(svm_model, X_test_selected, y_test)

Random Forest:
Accuracy: 0.9836065573770492
Precision: 0.9838195354562383
Recall (Sensitivity): 0.9836065573770492
F1 Score: 0.9836080818216429
Confusion Matrix:
[[142   0   1]
 [  0 147   1]
 [  5   0 131]]

Logistic Regression:
Accuracy: 0.7236533957845434
Precision: 0.7080755802950767
Recall (Sensitivity): 0.7236533957845434
F1 Score: 0.698348397912938
Confusion Matrix:
[[130   0  13]
 [  2 131  15]
 [ 45  43  48]]

XGBoost:
Accuracy: 0.9976580796252927
Precision: 0.9976743429612283
Recall (Sensitivity): 0.9976580796252927
F1 Score: 0.9976578387396453
Confusion Matrix:
[[143   0   0]
 [  0 148   0]
 [  1   0 135]]

SVM:
Accuracy: 0.6791569086651054
Precision: 0.6846443680592279
Recall (Sensitivity): 0.6791569086651054
F1 Score: 0.5925408528450842
Confusion Matrix:
[[132   5   6]
 [  0 148   0]
 [ 23 103  10]]
