In [1]:
import joblib
import function_library as fl
import importlib
import pandas as pd
importlib.reload(fl)

<module 'function_library' from 'C:\\Users\\leere\\PycharmProjects\\Football_ML3\\function_library.py'>

In [2]:
file_path = r"C:\Users\leere\OneDrive\Desktop\RAW DATA\GOALS_LIVE.CSV"

# List of common encodings to try
encodings = ['utf-8', 'latin1', 'iso-8859-1', 'cp1252']

data = None
for encoding in encodings:
    try:
        data = pd.read_csv(file_path, encoding=encoding)
        print(f"Successfully read the file with encoding: {encoding}")
        break
    except UnicodeDecodeError:
        print(f"Failed to decode with encoding: {encoding}")

col_dict = {
    "country": "Country",
    "league": "League",
    "datameci": "Date",
    "etapa": "Round",
    "txtechipa1": "home_team",
    "txtechipa2": "away_team",
    "place1t": "Home_team_place_total",
    "place1a": "Home_team_place_home",
    "place2t": "Away_team_place_total",
    "place2d": "Away_team_place_away",
    "customh": "ELO_home",
    "customa": "ELO_away",
    "custom3": "FORM_home",
    "custom4": "FORM_away",
    "home_val": "home_win",
    "home_val_2": "home_win_15",
    "home_val_3": "home_o25",
    "home_val_4": "home_o35",
    "home_val_5": "home_scored",
    "away_val": "away_win",
    "away_val_2": "away_win_15",
    "away_val_3": "away_o25",
    "away_val_4": "away_o35",
    "away_val_5": "away_scored",
    "scor1": "home_goals",
    "scor2": "away_goals",
    "cotao": "o2.5_odds",
}

data = data.rename(columns=col_dict).filter(items=col_dict.values())
# Convert Date column to datetime type
data['Date'] = pd.to_datetime(data['Date'], format='%d/%m/%Y')

# Order by date
data = data.sort_values(by='Date')

data = data[data["Round"] >= 8]
data.dropna(inplace=True)
data.reset_index(drop=True, inplace=True)

cols_to_drop = ['Date','home_team', 'away_team', 'home_goals', 'away_goals', 'o2.5_odds']

data_ready =data.drop(columns=cols_to_drop)
data_ready

Successfully read the file with encoding: utf-8


Unnamed: 0,Country,League,Round,Home_team_place_total,Home_team_place_home,Away_team_place_total,Away_team_place_away,ELO_home,ELO_away,FORM_home,...,home_win,home_win_15,home_o25,home_o35,home_scored,away_win,away_win_15,away_o25,away_o35,away_scored
0,England,Premier L,20,17,18,3,3,1625.0,1818.0,19.0,...,22.222,22.222,77.778,33.333,1.556,60.0,30.0,50.0,30.0,1.5
1,England,Championship,26,16,21,20,23,1386.0,1296.0,-1.0,...,30.769,23.077,53.846,30.769,1.385,8.333,0.0,50.0,33.333,0.667
2,Greece,Super L,17,6,7,1,3,1864.0,2192.0,2.0,...,37.5,12.5,37.5,0.0,1.0,50.0,37.5,37.5,12.5,1.5
3,Portugal,Primeira L,17,11,9,10,17,1763.0,1787.0,15.0,...,42.857,28.571,57.143,14.286,1.571,12.5,12.5,37.5,37.5,0.375
4,Turkey,1. Lig,18,18,11,13,11,1726.0,1716.0,1.0,...,50.0,25.0,50.0,25.0,1.375,22.222,0.0,55.556,33.333,1.111
5,Australia,A-League,12,2,8,4,4,1961.0,1985.0,15.0,...,20.0,20.0,60.0,40.0,2.4,100.0,25.0,75.0,75.0,3.0
6,Israel,Premier League,17,3,3,4,5,2222.0,2040.0,-7.0,...,71.429,57.143,57.143,28.571,2.143,37.5,25.0,75.0,50.0,1.75
7,Israel,Premier League,17,7,11,8,13,1900.0,1970.0,-19.0,...,37.5,0.0,37.5,25.0,1.125,14.286,14.286,42.857,28.571,0.714
8,England,League One,16,2,5,4,6,1381.0,1328.0,-2.0,...,63.636,27.273,45.455,18.182,1.636,50.0,40.0,60.0,30.0,1.8
9,Spain,Segunda,14,22,20,8,10,1385.0,1587.0,-12.0,...,25.0,12.5,12.5,12.5,0.875,30.0,0.0,50.0,10.0,0.9


In [3]:
# Directory where models are saved
models_dir = "ALL_MODELS"

# Group unseen data by League and Sub-League
grouped_data = data_ready.groupby(['Country', 'League'])

# Dictionary to store predictions
predictions = {}

# Iterate through each group in the unseen data
for (country, league), group in grouped_data:
    # Construct the model filename
    model_filename = f"{models_dir}/{country}_{league}_model.pkl"

    try:
        # Load the corresponding model
        saved_data = joblib.load(model_filename)  # Load the entire dictionary
        model = saved_data['model']  # Extract the model
        scaler = saved_data['scaler']  # Extract the scaler
        threshold = saved_data['threshold']  # Extract the threshold

        # Extract features for prediction (exclude grouping columns)
        X = group.drop(columns=['Country', 'League'])

        # Scale the features
        X_scaled = scaler.transform(X)

        # Make probability predictions using the model
        y_proba = model.predict_proba(X_scaled)[:, 1]  # Positive class probabilities

        # Filter predictions based on the threshold
        group['Probability'] = y_proba
        group['Prediction'] = (y_proba >= threshold).astype(int)

        # Keep only rows where Prediction == 1
        filtered_predictions = group[group['Prediction'] == 1]

        # Add `home_team` and `away_team` from the original `data`
        game_info = data.loc[filtered_predictions.index, ['home_team', 'away_team', 'Date']]
        filtered_predictions = pd.concat([game_info, filtered_predictions], axis=1)

        # Add filtered predictions to the dictionary
        predictions[(country, league)] = filtered_predictions
        print(f"Filtered predictions made for {country} - {league}")

    except FileNotFoundError:
        print(f"Model not found for {country} - {league}. Skipping...")

    except Exception as e:
        print(f"Error processing {country} - {league}: {e}")

# Combine all predictions into a single DataFrame
if predictions:
    all_predictions = pd.concat(predictions.values(), ignore_index=True)
    print("\nAll Predictions as a DataFrame:")
    print(all_predictions)
else:
    print("No predictions available.")


Model not found for Australia - A-League. Skipping...
Filtered predictions made for England - Championship
Filtered predictions made for England - League One
Filtered predictions made for England - Premier L
Filtered predictions made for Greece - Super L
Filtered predictions made for Israel - Premier League
Filtered predictions made for Portugal - Primeira L
Filtered predictions made for Scotland - Premier L
Filtered predictions made for Spain - Segunda
Model not found for Turkey - 1. Lig. Skipping...

All Predictions as a DataFrame:
       home_team         away_team       Date   Country          League  \
0            QPR             Luton 2025-01-06   England    Championship   
1    Panetolikos        Olympiacos 2025-01-06    Greece         Super L   
2  Maccabi Haifa  Beitar Jerusalem 2025-01-06    Israel  Premier League   
3         Celtic     Dundee United 2025-01-08  Scotland       Premier L   

   Round  Home_team_place_total  Home_team_place_home  Away_team_place_total  \
0   

In [4]:
all_predictions

Unnamed: 0,home_team,away_team,Date,Country,League,Round,Home_team_place_total,Home_team_place_home,Away_team_place_total,Away_team_place_away,...,home_o25,home_o35,home_scored,away_win,away_win_15,away_o25,away_o35,away_scored,Probability,Prediction
0,QPR,Luton,2025-01-06,England,Championship,26,16,21,20,23,...,53.846,30.769,1.385,8.333,0.0,50.0,33.333,0.667,0.526302,1
1,Panetolikos,Olympiacos,2025-01-06,Greece,Super L,17,6,7,1,3,...,37.5,0.0,1.0,50.0,37.5,37.5,12.5,1.5,0.878788,1
2,Maccabi Haifa,Beitar Jerusalem,2025-01-06,Israel,Premier League,17,3,3,4,5,...,57.143,28.571,2.143,37.5,25.0,75.0,50.0,1.75,0.576927,1
3,Celtic,Dundee United,2025-01-08,Scotland,Premier L,11,1,2,3,2,...,77.778,55.556,3.222,40.0,0.0,40.0,20.0,1.5,0.558651,1


In [5]:
fl.create_import_file(all_predictions, r"C:\Users\leere\OneDrive\Desktop\IMPORTS\LAY_U25_ALL_IMPORT.csv", provider="lay_all_u25", market_name="Over/Under 2.5 Goals", selection_name="Under 2.5 Goals")

File created and saved successfully at: C:\Users\leere\OneDrive\Desktop\IMPORTS\LAY_U25_ALL_IMPORT.csv
