In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
import sklearn.metrics as mtr

In [2]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier

In [3]:
dataset = pd.read_csv("dummy_match.csv", index_col=0)

In [None]:
dataset = dataset.drop(columns=['h_goals_on_penalty','a_goals_on_penalty', 'h_goals', 'a_goals', 'h_corners', 'h_fouls', 'h_yellow_cards', 'h_red_cards', 'a_corners', 'a_fouls', 'a_yellow_cards', 'a_red_cards','h_total_penalties','a_total_penalties'])
dataset.columns

In [5]:
models = {
    'Logistic Regression': LogisticRegression(max_iter=10000), #max_iter di default vale 100, ho dovuto alzarlo se no non converge
    'Support Vector Machine': SVC(),
    'Multinomial Naive Bayes': MultinomialNB(),
    'Decision Tree': DecisionTreeClassifier(),
    'Random Forest': RandomForestClassifier(),
    'K-Nearest Neighbors': KNeighborsClassifier()
}

In [6]:
features = [x for x in dataset.columns if x != 'Risultato']
X, y = dataset[features], dataset.Risultato.values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
for model_name, model in models.items():
    models[model_name].fit(X_train, y_train)

predictions = {}
for model_name, model in models.items():
    predictions[model_name] = model.predict(X_test)

In [13]:
E = []
for estimator, y_pred in predictions.items():
    report = mtr.classification_report(y_test, y_pred, output_dict=True, zero_division=0)
    E.append({
        'Model': estimator, 'Accuracy': report['accuracy'],
        'Avg Precision (macro)': report['macro avg']['precision'],
        'Avg Recall (macro)': report['macro avg']['recall'],
        'Avg F1-score (macro)': report['macro avg']['f1-score'],
        'Avg Precision (weighted)': report['weighted avg']['precision'],
        'Avg Recall (weighted)': report['weighted avg']['recall'],
        'Avg F1-score (weighted)': report['weighted avg']['f1-score']
    })
E = pd.DataFrame(E).set_index('Model', inplace=False)

In [9]:
E

Unnamed: 0_level_0,Accuracy,Avg Precision (macro),Avg Recall (macro),Avg F1-score (macro),Avg Precision (weighted),Avg Recall (weighted),Avg F1-score (weighted)
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Logistic Regression,0.552632,0.491597,0.496843,0.493804,0.541685,0.552632,0.546833
Support Vector Machine,0.407895,0.335784,0.371493,0.263757,0.407217,0.407895,0.300365
Multinomial Naive Bayes,0.513158,0.507073,0.468294,0.472222,0.510575,0.513158,0.501096
Decision Tree,0.5,0.475517,0.467172,0.467149,0.527815,0.5,0.509407
Random Forest,0.486842,0.425112,0.440095,0.425576,0.47795,0.486842,0.474419
K-Nearest Neighbors,0.368421,0.360794,0.366162,0.333603,0.394652,0.368421,0.345674
