In [5]:
import os
import random as rd
from datetime import datetime
from functools import reduce
from itertools import product
from operator import mul
import numpy as np
import pandas as pd
from scipy.stats import ttest_1samp
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler

In [7]:
# List of common encodings to try
encodings = ['utf-8', 'latin1', 'iso-8859-1', 'cp1252']

data = None
for encoding in encodings:
    try:
        data = pd.read_csv("RESULTS_DATA.CSV", encoding=encoding, low_memory=False)
        print(f"Successfully read the file with encoding: {encoding}")
        break
    except UnicodeDecodeError:
        print(f"Failed to decode with encoding: {encoding}")

col_dict = {
    "country": "Country",
    "league": "League",
    "datameci": "Date",
    "etapa": "Round",
    "txtechipa1": "home_team",
    "txtechipa2": "away_team",
    "place1t": "Home_team_place_total",
    "place1a": "Home_team_place_home",
    "place2t": "Away_team_place_total",
    "place2d": "Away_team_place_away",
    "customh": "ELO_home",
    "customa": "ELO_away",
    "custom3": "FORM_home",
    "custom4": "FORM_away",
    "home_val": "home_wins",
    "home_val_2": "home_not_wins",
    "home_val_3": "home_ppg",
    "home_val_4": "home_ppg_opp",
    "home_val_5": "home_elo_diff",
    "away_val": "away_wins",
    "away_val_2": "away_not_wins",
    "away_val_3": "away_ppg",
    "away_val_4": "away_ppg_opp",
    "away_val_5": "away_elo_diff",
    "scor1": "home_goals",
    "scor2": "away_goals",
    "cotaa": "home_odds",
    "cotae": "draw_odds",
    "cotad": "away_odds"
}

data = data.rename(columns=col_dict).filter(items=col_dict.values())
# Convert Date column to datetime type
data['Date'] = pd.to_datetime(data['Date'], format='%d/%m/%Y')

# Order by date
data = data.sort_values(by='Date')

data = data[data["Round"] >= 8]
data.dropna(inplace=True)
data.reset_index(drop=True, inplace=True)

data['home_win'] = data['home_goals'] > data['away_goals']

Failed to decode with encoding: utf-8
Successfully read the file with encoding: latin1


In [8]:
data

Unnamed: 0,Country,League,Date,Round,home_team,away_team,Home_team_place_total,Home_team_place_home,Away_team_place_total,Away_team_place_away,...,away_not_wins,away_ppg,away_ppg_opp,away_elo_diff,home_goals,away_goals,home_odds,draw_odds,away_odds,home_win
0,Slovakia,Superliga,2013-08-30,8,Senica,Zilina,1.0,1,6.0,4,...,75.000,41.667,83.333,0.000,2,1,2.10,3.25,3.20,True
1,Austria,Erste Liga,2013-08-30,8,St. Polten,Liefering,6.0,5,2.0,2,...,66.667,44.444,22.222,-0.333,1,4,3.10,3.00,2.20,False
2,Croatia,Division 1,2013-08-30,8,RNK Split,NK Dragovoljac,6.0,5,8.0,7,...,100.000,22.222,33.333,4.667,1,0,1.38,3.90,8.50,True
3,Austria,Erste Liga,2013-08-30,8,First Vienna,Parndorf,9.0,10,10.0,10,...,100.000,0.000,77.778,-19.750,1,0,2.10,3.40,3.40,True
4,Austria,Erste Liga,2013-08-30,8,Mattersburg,Hartberg,3.0,2,4.0,5,...,66.667,44.444,100.000,10.333,4,4,1.67,3.80,5.00,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
115367,England,League One,2024-12-21,21,Bristol Rovers,Wrexham,20.0,14,3.0,11,...,66.667,44.444,43.009,-0.222,1,1,4.80,3.64,1.79,False
115368,England,League One,2024-12-21,21,Burton,Exeter,23.0,24,15.0,14,...,66.667,37.037,44.735,-0.111,1,2,3.06,3.38,2.40,False
115369,England,League One,2024-12-21,21,Lincoln,Reading,9.0,11,6.0,15,...,80.000,33.333,58.219,-1.600,2,0,1.71,3.85,5.02,True
115370,Scotland,Championship,2024-12-21,18,Livingston,Ayr United,2.0,3,3.0,4,...,75.000,37.500,53.492,-1.625,0,1,2.11,3.18,3.81,False
