In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import numpy as np
import os
import json

In [2]:
data = pd.read_csv('data_cleaned/train_data_cleaned.csv', header=None)

X = data.iloc[:, 0]
y = data.iloc[:, 1]

X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.33, random_state=42)

In [3]:
def predict(model, x):
    return np.random.choice([-1, 0, 1])

def run_inference(model, X):
    return np.array([predict(model, x) for x in X])

In [4]:
def evaluate_model(model, X, y, model_name, dataset_name, save=False, verbose=True):
    y_pred = run_inference(model, X)

    accuracy = accuracy_score(y, y_pred)

    # Metrics for positive class (1)
    precision_pos = precision_score(y, y_pred, labels=[1], average='micro')
    recall_pos = recall_score(y, y_pred, labels=[1], average='micro')
    f1_pos = f1_score(y, y_pred, labels=[1], average='micro')

    # Metrics for negative class (-1)
    precision_neg = precision_score(y, y_pred, labels=[-1], average='micro')
    recall_neg = recall_score(y, y_pred, labels=[-1], average='micro')
    f1_neg = f1_score(y, y_pred, labels=[-1], average='micro')

    results = {
        "model_name": model_name,
        "dataset_name": dataset_name,
        "accuracy": accuracy,
        "precision_pos": precision_pos,
        "recall_pos": recall_pos,
        "f1_pos": f1_pos,
        "precision_neg": precision_neg,
        "recall_neg": recall_neg,
        "f1_neg": f1_neg
    }

    if verbose:
        for key in results:
            print(f"{key}: {results[key]}")
    
    if save:
        if not os.path.exists('evaluation_history'):
            os.makedirs('evaluation_history')

        file_name = f"evaluation_history/{model_name}_{dataset_name}.json"
        with open(file_name, 'w') as f:
            json.dump(results, f)
    
    return results

def evaluate_model_on_test(model, model_name):
    evaluate_model(model, X_test, y_test, model_name, "test", save=False, verbose=True)

def evaluate_model_on_validation(model, model_name):
    evaluate_model(model, X_val, y_val, model_name, "validation", save=True, verbose=True)

In [5]:
def load_model(model_name):
    return None
    
model = load_model("name")

In [6]:
evaluate_model_on_validation(model, "Random Classifier")

model_name: Random Classifier
dataset_name: validation
accuracy: 0.34496644295302015
precision_pos: 0.2587131367292225
recall_pos: 0.3496376811594203
f1_pos: 0.29738058551617874
precision_neg: 0.453125
recall_neg: 0.3547400611620795
f1_neg: 0.39794168096054894


In [7]:
evaluate_model_on_test(model, "Random Classifier")

model_name: Random Classifier
dataset_name: test
accuracy: 0.34150772025431425
precision_pos: 0.23809523809523808
recall_pos: 0.3281853281853282
f1_pos: 0.275974025974026
precision_neg: 0.46035805626598464
recall_neg: 0.35785288270377735
f1_neg: 0.40268456375838924
