In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, f1_score
from sklearn.preprocessing import LabelEncoder

# Assuming 'train' is your DataFrame with 'utterance' and 'emotion' columns for training
# Assuming 'val' is your DataFrame with 'utterance' and 'emotion' columns for validation

# Encode emotion labels
label_encoder = LabelEncoder()
df['emotion_encoded'] = label_encoder.fit_transform(df['label'])
val_df['emotion_encoded'] = label_encoder.transform(val_df['label'])

# Define a list of models
models = [MultinomialNB(), SVC(), RandomForestClassifier()]

vectorizer = CountVectorizer()

for model in models:
    # Vectorize the 'utterance' column in the training set
    X_train = vectorizer.fit_transform(df['utterance'])
    y_train = df['emotion_encoded']

    # Vectorize the 'utterance' column in the validation set
    X_val = vectorizer.transform(val_df['utterance'])
    y_val = val_df['emotion_encoded']

    # Fit the model on the training set
    model.fit(X_train, y_train)

    # Predict the emotions on the validation set
    predictions = model.predict(X_val)

    # Evaluate the model on the validation set
    accuracy = accuracy_score(y_val, predictions)
    report = classification_report(y_val, predictions)

    # Calculate the weighted F1 score on the validation set
    weighted_f1 = f1_score(y_val, predictions, average='weighted')

    #print(f"Model: {model._class.name_}")
    print(f"Validation Accuracy: {accuracy}")
    print("Validation Classification Report:\n", report)
    print(f"Validation Weighted F1 Score: {weighted_f1}")
    print("-" * 50)