In [None]:
import pandas as pd
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Load dataset
file_path = r"C:\Users\ROG\Desktop\RMA_final\Updated_Dataset_with_Goal_Difference.csv"
dataset = pd.read_csv(file_path)

# Select relevant columns
selected_features = [
    "home_score_total",
    "away_score_total",
    "home_team",
    "away_team",
    "home_team_form",
    "away_team_form",
    "goal_difference",
    "HomeTeam Injuries",
    "AwayTeam Injuries",
]
data = dataset[selected_features].dropna()

# Ensure exogenous features are numeric
for col in ["home_team_form", "away_team_form", "goal_difference", "HomeTeam Injuries", "AwayTeam Injuries"]:
    data[col] = pd.to_numeric(data[col], errors="coerce")

# Drop rows with any non-numeric values in exogenous features
data = data.dropna()

# Define the exogenous variables
exog_features = [
    "home_team_form",
    "away_team_form",
    "goal_difference",
    "HomeTeam Injuries",
    "AwayTeam Injuries",
]

# Split data into training and testing sets
train_size = int(len(data) * 0.8)
train_data = data[:train_size]
test_data = data[train_size:]

# ARIMAX Parameters
best_order = (1, 1, 1)  # Non-seasonal order (p, d, q)

# Fit ARIMAX for home_score_total
home_model = ARIMA(
    train_data["home_score_total"],
    exog=train_data[exog_features],
    order=best_order,
)
home_model_fit = home_model.fit()

# Fit ARIMAX for away_score_total
away_model = ARIMA(
    train_data["away_score_total"],
    exog=train_data[exog_features],
    order=best_order,
)
away_model_fit = away_model.fit()

# Make predictions on the test set
test_home_predictions = home_model_fit.predict(
    start=len(train_data),
    end=len(train_data) + len(test_data) - 1,
    exog=test_data[exog_features]
)

test_away_predictions = away_model_fit.predict(
    start=len(train_data),
    end=len(train_data) + len(test_data) - 1,
    exog=test_data[exog_features]
)

# Round predictions to integers
test_data["predicted_home_score_total"] = test_home_predictions.round().astype(int)
test_data["predicted_away_score_total"] = test_away_predictions.round().astype(int)

# Calculate performance metrics for the test dataset
mae_home = mean_absolute_error(test_data["home_score_total"], test_data["predicted_home_score_total"])
mse_home = mean_squared_error(test_data["home_score_total"], test_data["predicted_home_score_total"])
r2_home = r2_score(test_data["home_score_total"], test_data["predicted_home_score_total"])

mae_away = mean_absolute_error(test_data["away_score_total"], test_data["predicted_away_score_total"])
mse_away = mean_squared_error(test_data["away_score_total"], test_data["predicted_away_score_total"])
r2_away = r2_score(test_data["away_score_total"], test_data["predicted_away_score_total"])

# Display performance metrics for the test dataset
print("\nTest Performance Metrics:")
print(f"Home Score Total - MAE: {mae_home}, MSE: {mse_home}, R²: {r2_home}")
print(f"Away Score Total - MAE: {mae_away}, MSE: {mse_away}, R²: {r2_away}")

# Prediction Function
def predict_scores(home_team, away_team):
    # Filter the dataset for the given teams to get the latest data
    team_data = data[
        (data["home_team"] == home_team) & (data["away_team"] == away_team)
    ]

    # Check if any matching data exists
    if team_data.empty:
        raise ValueError("No recent match data found for the given teams.")

    # Get the latest record for prediction
    latest_team_data = team_data.iloc[-1]

    # Extract exogenous variables for prediction
    exog_data = latest_team_data[exog_features].astype(float).values.reshape(1, -1)

    # Predict scores
    predicted_home_score = home_model_fit.predict(start=len(train_data), end=len(train_data), exog=exog_data).iloc[0]
    predicted_away_score = away_model_fit.predict(start=len(train_data), end=len(train_data), exog=exog_data).iloc[0]

    return round(predicted_home_score), round(predicted_away_score)


# Input and Output
home_team_input = input("Enter the home team name: ")
away_team_input = input("Enter the away team name: ")

try:
    predicted_home, predicted_away = predict_scores(home_team_input, away_team_input)
    print(f"\nPredicted Scores: {home_team_input} {predicted_home} - {predicted_away} {away_team_input}")
except ValueError as e:
    print(e)
except Exception as e:
    print("An error occurred:", e)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_data["predicted_home_score_total"] = test_home_predictions.round().astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_data["predicted_away_score_total"] = test_away_predictions.round().astype(int)



Test Performance Metrics:
Home Score Total - MAE: 0.5384615384615384, MSE: 0.6923076923076923, R²: 0.4564459930313588
Away Score Total - MAE: 0.5384615384615384, MSE: 0.6923076923076923, R²: 0.4781962338949456
No recent match data found for the given teams.
