In [None]:
import pandas as pd
from prophet import Prophet
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Load dataset
file_path = r"C:\Users\ROG\Desktop\SEM-5\ML\project codes\final\Updated_Dataset_with_Goal_Difference.csv" # Replace with the correct dataset path
dataset = pd.read_csv(file_path)

# Select relevant columns
selected_features = [
    "date",  # Ensure this column exists or replace with the correct date column name
    "home_team",
    "away_team",
    "home_score_total",
    "away_score_total",
    "home_team_form",
    "away_team_form",
    "goal_difference",
    "HomeTeam Injuries",
    "AwayTeam Injuries",
]
data = dataset[selected_features].dropna()

# Ensure the date column is in datetime format
data["date"] = pd.to_datetime(data["date"], dayfirst=True)

# Prepare the data for Prophet models
prophet_home_data = data.rename(columns={"date": "ds", "home_score_total": "y"})
prophet_away_data = data.rename(columns={"date": "ds", "away_score_total": "y"})

# Train-Test Split
train_size = int(len(data) * 0.8)
train_data_home = prophet_home_data[:train_size]
test_data_home = prophet_home_data[train_size:]
train_data_away = prophet_away_data[:train_size]
test_data_away = prophet_away_data[train_size:]

# Train the Prophet model for home_score_total
home_model = Prophet(
    seasonality_mode='multiplicative',  # Add multiplicative seasonality
    changepoint_prior_scale=0.05  # Regularize changepoints
)
home_model.add_regressor("home_team_form")
home_model.add_regressor("goal_difference")
home_model.add_regressor("HomeTeam Injuries")
home_model.add_seasonality(name='weekly', period=7, fourier_order=3)  # Add weekly seasonality
home_model.fit(train_data_home)

# Train the Prophet model for away_score_total
away_model = Prophet(
    seasonality_mode='multiplicative',
    changepoint_prior_scale=0.05
)
away_model.add_regressor("away_team_form")
away_model.add_regressor("goal_difference")
away_model.add_regressor("AwayTeam Injuries")
away_model.add_seasonality(name='weekly', period=7, fourier_order=3)
away_model.fit(train_data_away)

# Evaluate Performance Metrics
home_forecast_test = home_model.predict(test_data_home[["ds", "home_team_form", "goal_difference", "HomeTeam Injuries"]])
away_forecast_test = away_model.predict(test_data_away[["ds", "away_team_form", "goal_difference", "AwayTeam Injuries"]])

mae_home = mean_absolute_error(test_data_home["y"], home_forecast_test["yhat"])
mse_home = mean_squared_error(test_data_home["y"], home_forecast_test["yhat"])
r2_home = r2_score(test_data_home["y"], home_forecast_test["yhat"])

mae_away = mean_absolute_error(test_data_away["y"], away_forecast_test["yhat"])
mse_away = mean_squared_error(test_data_away["y"], away_forecast_test["yhat"])
r2_away = r2_score(test_data_away["y"], away_forecast_test["yhat"])

# Display Performance Metrics
print("\nProphet Model Performance Metrics:")
print(f"Home Score Total - MAE: {mae_home}, MSE: {mse_home}, R²: {r2_home}")
print(f"Away Score Total - MAE: {mae_away}, MSE: {mse_away}, R²: {r2_away}")

# Prediction Function for Team Names
def predict_scores_with_prophet(home_team, away_team, data):
    # Filter the data for the specified teams
    team_data = data[
        (data["home_team"] == home_team) & (data["away_team"] == away_team)
    ]
    
    if team_data.empty:
        raise ValueError(f"No data found for match: {home_team} vs {away_team}")
    
    # Prepare data for Prophet (Home Score)
    home_data = team_data.rename(columns={"date": "ds", "home_score_total": "y"})
    home_data = home_data[["ds", "y", "home_team_form", "goal_difference", "HomeTeam Injuries"]]

    # Prepare data for Prophet (Away Score)
    away_data = team_data.rename(columns={"date": "ds", "away_score_total": "y"})
    away_data = away_data[["ds", "y", "away_team_form", "goal_difference", "AwayTeam Injuries"]]

    # Use the most recent match data for prediction
    last_home_row = home_data.iloc[-1]
    last_away_row = away_data.iloc[-1]

    # Create DataFrame for future prediction
    future_home = pd.DataFrame(
        {
            "ds": [last_home_row["ds"]],
            "home_team_form": [last_home_row["home_team_form"]],
            "goal_difference": [last_home_row["goal_difference"]],
            "HomeTeam Injuries": [last_home_row["HomeTeam Injuries"]],
        }
    )

    future_away = pd.DataFrame(
        {
            "ds": [last_away_row["ds"]],
            "away_team_form": [last_away_row["away_team_form"]],
            "goal_difference": [last_away_row["goal_difference"]],
            "AwayTeam Injuries": [last_away_row["AwayTeam Injuries"]],
        }
    )

    # Predict scores
    home_forecast = home_model.predict(future_home)
    away_forecast = away_model.predict(future_away)

    predicted_home_score = round(home_forecast["yhat"].iloc[0])
    predicted_away_score = round(away_forecast["yhat"].iloc[0])

    return predicted_home_score, predicted_away_score

# Input team names and predict scores
home_team_input = input("Enter the home team name: ")
away_team_input = input("Enter the away team name: ")

try:
    predicted_home, predicted_away = predict_scores_with_prophet(
        home_team_input, away_team_input, data
    )
    print(f"\nPredicted Scores: {home_team_input} {predicted_home} - {predicted_away} {away_team_input}")
except ValueError as e:
    print(e)
except Exception as e:
    print("An error occurred:", e)


01:55:52 - cmdstanpy - INFO - Chain [1] start processing
01:55:52 - cmdstanpy - INFO - Chain [1] done processing
01:55:52 - cmdstanpy - INFO - Chain [1] start processing
01:55:52 - cmdstanpy - INFO - Chain [1] done processing



Prophet Model Performance Metrics:
Home Score Total - MAE: 1.1788184637641863, MSE: 2.085207577123958, R²: -0.6371664600880322
Away Score Total - MAE: 1.1085536692016182, MSE: 1.7992010978141268, R²: -0.35608764607298626


Enter the home team name:  Belgium
Enter the away team name:  USSR



Predicted Scores: Belgium 3 - 0 USSR
