In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import recall_score, precision_score, f1_score, accuracy_score
from sklearn.feature_extraction.text import CountVectorizer
from skmultilearn.problem_transform import ClassifierChain, BinaryRelevance


# Assuming you have your dataset as a DataFrame, with 'text' as the feature column and emotions as the labels
emotions = ['Anger', 'Fear', 'Joy', 'Sadness', 'Surprise']

# Read your data (replace with your actual dataset)
train = pd.read_csv('eng_train.csv')

# Split the dataset into training and validation sets
train, val = train_test_split(train, test_size=0.05, random_state=42)

# Vectorize the text data using CountVectorizer
vectorizer = CountVectorizer(ngram_range=(1, 2))
X_train = vectorizer.fit_transform(train['text'].str.lower()).toarray()
X_val = vectorizer.transform(val['text'].str.lower()).toarray()

# Prepare the labels
y_train = train[emotions].values
y_val = val[emotions].values

# Function to evaluate performance of models
def evaluate(y_val, y_pred):
    for average in ['micro', 'macro']:
        recall = recall_score(y_val, y_pred, average=average, zero_division=0)
        precision = precision_score(y_val, y_pred, average=average, zero_division=0)
        f1 = f1_score(y_val, y_pred, average=average, zero_division=0)
    
        print(f'{average.upper()} recall: {round(recall, 4)}, precision: {round(precision, 4)}, f1: {round(f1, 4)}')

# Function to evaluate per class
def evaluate_per_class(y_val, y_pred):
    y_val_dense = y_val.toarray() if hasattr(y_val, 'toarray') else y_val  # Convert sparse to dense if needed
    y_pred_dense = y_pred.toarray() if hasattr(y_pred, 'toarray') else y_pred  # Convert sparse to dense if needed

    for i, emotion in enumerate(emotions):
        print(f'*** {emotion} ***')
    
        recall = recall_score(y_val_dense[:,i], y_pred_dense[:,i], zero_division=0)
        precision = precision_score(y_val_dense[:,i], y_pred_dense[:,i], zero_division=0)
        f1 = f1_score(y_val_dense[:,i], y_pred_dense[:,i], zero_division=0)
        
        print(f'recall: {round(recall, 4)}, precision: {round(precision, 4)}, f1: {round(f1, 4)}\n')

# Define models to evaluate
models = {
    'Binary Relevance with Logistic Regression': BinaryRelevance(LogisticRegression(max_iter=1000)),
    'Classifier Chain with Logistic Regression': ClassifierChain(LogisticRegression(max_iter=1000)),
    'Binary Relevance with Random Forest': BinaryRelevance(RandomForestClassifier(n_estimators=100)),
    'Classifier Chain with Random Forest': ClassifierChain(RandomForestClassifier(n_estimators=100)),
    'Binary Relevance with SVM': BinaryRelevance(SVC(kernel='linear', probability=True)),
    'Classifier Chain with SVM': ClassifierChain(SVC(kernel='linear', probability=True))
}

# Evaluate each model
for model_name, model in models.items():
    print(f'\n\nEvaluating {model_name}...\n')
    
    # Train the model
    model.fit(X_train, y_train)
    
    # Predict on validation data
    y_pred = model.predict(X_val)
    
    # Evaluate model performance
    print(f'\nEVALUATION: {model_name}')
    evaluate(y_val, y_pred)
    
    print('\nPER CLASS BREAKDOWN')
    evaluate_per_class(y_val, y_pred)




Evaluating Binary Relevance with Logistic Regression...


EVALUATION: Binary Relevance with Logistic Regression
MICRO recall: 0.4272, precision: 0.6364, f1: 0.5112
MACRO recall: 0.298, precision: 0.4758, f1: 0.3388

PER CLASS BREAKDOWN
*** Anger ***
recall: 0.0, precision: 0.0, f1: 0.0

*** Fear ***
recall: 0.7375, precision: 0.6782, f1: 0.7066

*** Joy ***
recall: 0.1034, precision: 0.5, f1: 0.1714

*** Sadness ***
recall: 0.2174, precision: 0.625, f1: 0.3226

*** Surprise ***
recall: 0.4318, precision: 0.5758, f1: 0.4935



Evaluating Classifier Chain with Logistic Regression...


EVALUATION: Classifier Chain with Logistic Regression
MICRO recall: 0.4836, precision: 0.6023, f1: 0.5365
MACRO recall: 0.39, precision: 0.4644, f1: 0.4171

PER CLASS BREAKDOWN
*** Anger ***
recall: 0.0, precision: 0.0, f1: 0.0

*** Fear ***
recall: 0.675, precision: 0.6835, f1: 0.6792

*** Joy ***
recall: 0.5172, precision: 0.5, f1: 0.5085

*** Sadness ***
recall: 0.3261, precision: 0.625, f1: 0.4286

**