# 05 - Prediction

In [4]:
# imports
import os
import joblib
import tensorflow as tf
import numpy as np
import pandas as pd
import requests

In [5]:
# load the data
data = pd.read_csv(os.path.join('..','data','processed','data_for_model.csv'))

# load the model
best_model= tf.keras.models.load_model(os.path.join('..','models','model_best.keras'))

# load feature_info, ct and label_encoder
feature_info = np.load(os.path.join('..','models','feature_info.npy'), allow_pickle=True).item()
ct = joblib.load(os.path.join('..', 'models', 'column_transformer.pkl'))
label_encoder = joblib.load(os.path.join('..', 'models', 'label_encoder.pkl'))


W0000 00:00:1737316090.618345  387058 gpu_device.cc:2344] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


## Building the function to predict football match results

In [6]:
def predict_match_outcome(data, home_team, away_team, date_str, ct, best_model_dense, label_encoder, feature_info):
    """
    Predict the outcome of a football match based on historical data and features.

    Parameters:
        data (pd.DataFrame): The historical match data.
        home_team (str): Home team's name.
        away_team (str): Away team's name.
        date_str (str): Date of the match in 'YYYY-MM-DD' format.
        ct (ColumnTransformer): Preprocessing pipeline from training.
        best_model_dense (keras.Model): Trained neural network model.
        label_encoder (LabelEncoder): Encoder for target classes.
        feature_info (dict): Dictionary containing `categorical_features` and `numerical_features`.

    Returns:
        dict: Predicted probabilities and the predicted result.
    """
    # Extract feature info
    categorical_features = feature_info['categorical_features']
    numerical_features = feature_info['numerical_features']

    # Step 1: Define home and away team columns
    team_columns_home = [col for col in data.columns if col.startswith('home_')]
    team_columns_away = [col for col in data.columns if col.startswith('away_')]

    # Step 2: Get the latest features for both teams
    team_data_home = data[data['home_team'] == home_team].tail(1)[team_columns_home]
    team_data_away = data[data['away_team'] == away_team].tail(1)[team_columns_away]

    # Combine features into a single row
    combined_features = pd.concat([team_data_home.reset_index(drop=True), 
                                    team_data_away.reset_index(drop=True)], axis=1)

    # Step 3: Add date-related features
    date = pd.to_datetime(date_str, format='%Y-%m-%d')
    day_of_week = date.dayofweek
    month = date.month
    day_of_week_sin = np.sin(2 * np.pi * day_of_week / 7.0)
    day_of_week_cos = np.cos(2 * np.pi * day_of_week / 7.0)
    month_sin = np.sin(2 * np.pi * month / 12.0)
    month_cos = np.cos(2 * np.pi * month / 12.0)

    date_features = pd.DataFrame([{
        'day_of_week': day_of_week,
        'month': month,
        'day_of_week_sin': day_of_week_sin,
        'day_of_week_cos': day_of_week_cos,
        'month_sin': month_sin,
        'month_cos': month_cos
    }])

    # Combine all features
    final_features = pd.concat([combined_features, date_features], axis=1)

    # Step 4: Add missing features
    # Add missing categorical features with a default "missing" value
    for feature in categorical_features:
        if feature not in final_features.columns:
            final_features[feature] = "missing"

    # Add missing numerical features with a default value of 0
    for feature in numerical_features:
        if feature not in final_features.columns:
            final_features[feature] = 0

    # Reorder the columns to match the training feature order
    final_features = final_features[categorical_features + numerical_features]

    # Debug: Check final features before preprocessing
    #print("Final Features Before Preprocessing:\n", final_features)

    # Step 5: Preprocess the features
    X_new_processed = ct.transform(final_features)

    # Debug: Check processed features
    #print("Processed Features:\n", X_new_processed)

    # Step 6: Predict probabilities
    y_pred = best_model_dense.predict(X_new_processed)

    # Map probabilities to outcomes
    probabilities = {
        'Home Win': y_pred[0][label_encoder.transform(['H'])[0]],
        'Draw': y_pred[0][label_encoder.transform(['D'])[0]],
        'Away Win': y_pred[0][label_encoder.transform(['A'])[0]]
    }

    # Predicted class
    y_pred_classes = np.argmax(y_pred, axis=1)
    predicted_label = label_encoder.inverse_transform(y_pred_classes)[0]

    return {
        'probabilities': probabilities,
        'predicted_result': predicted_label
    }

In [7]:
home_team = 'everton'
away_team = 'wolves'
date_str = '2021-05-19'

# Predict
prediction = predict_match_outcome(
    data=data,
    home_team=home_team,
    away_team=away_team,
    date_str=date_str,
    ct=ct,  # Preprocessing pipeline
    best_model_dense=best_model,  # Trained model
    label_encoder=label_encoder,  # Target encoder
    feature_info=feature_info  # Training feature information
)

prediction

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step


{'probabilities': {'Home Win': 0.22579502,
  'Draw': 0.45725465,
  'Away Win': 0.31695032},
 'predicted_result': 'D'}

## Post Request

In [13]:
url = "http://0.0.0.0:9696/predict"
payload = {
    "home_team": "everton",
    "away_team": "wolves",
    "date": "2021-05-19"
}

response = requests.post(url, json=payload)
print(response.json())

{'Match_Result': 'Draw', 'Prob_Away_Win': 0.2455044388771057, 'Prob_Draw': 0.5076267719268799, 'Prob_Home_Win': 0.24686869978904724}


In [14]:
#test multiple matches
for match in [
    {"home_team": "arsenal", "away_team": "brentford", "date": "2021-08-13"},
    {"home_team": "liverpool", "away_team": "chelsea", "date": "2021-08-13"},
    {"home_team": "brentford", "away_team": "arsenal", "date": "2021-08-14"},
    {"home_team": "arsenal", "away_team": "brentford", "date": "2021-08-14"},
]:
    print(requests.post(url, json=match).json())  # test the API endpoint

{'Match_Result': 'Home_Win', 'Prob_Away_Win': 0.008116791024804115, 'Prob_Draw': 0.1457294374704361, 'Prob_Home_Win': 0.8461537957191467}
{'Match_Result': 'Away_Win', 'Prob_Away_Win': 0.5223577618598938, 'Prob_Draw': 0.32547494769096375, 'Prob_Home_Win': 0.15216724574565887}
{'Match_Result': 'Home_Win', 'Prob_Away_Win': 0.15303798019886017, 'Prob_Draw': 0.3209175765514374, 'Prob_Home_Win': 0.5260443687438965}
{'Match_Result': 'Home_Win', 'Prob_Away_Win': 0.008386810310184956, 'Prob_Draw': 0.13073571026325226, 'Prob_Home_Win': 0.8608774542808533}


In [15]:
!curl -X POST http://127.0.0.1:9696/predict \
     -H "Content-Type: application/json" \
     -d '{"home_team": "arsenal", "away_team": "liverpool", "date": "2024-12-16"}'

{
  "Match_Result": "Home_Win",
  "Prob_Away_Win": 0.0622507706284523,
  "Prob_Draw": 0.39442455768585205,
  "Prob_Home_Win": 0.5433247685432434
}
