<a href="https://colab.research.google.com/github/Ayman-analyst/Egyptian-League-Analytics/blob/main/Teams_Points_Predicitons_2017_2018.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Import Libraries**

In [None]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from math import sqrt

# **Load Previous and Target Season Data**

In [None]:
df_prev = pd.read_csv("/content/drive/MyDrive/Predicitons/All Pervious Seasons.csv")
df_target = pd.read_excel("/content/drive/MyDrive/Predicitons/2017-2018_Standing.xlsx")

# **Normalize Season Format and Align with Previous Season**

In [None]:
df_prev['Season'] = df_prev['Season'].str[:4].astype(int)
df_target['Season'] = df_target['Season'].str[:4].astype(int)
df_target['Season'] = df_target['Season'] - 1

# **Clean and Rename Target Points Column**

In [None]:
df_target_cleaned = df_target[['Team', 'Season', 'Ponits']].copy()
df_target_cleaned = df_target_cleaned.rename(columns={'Ponits': 'NextSeasonPoints'})

# **Merge the Two Datasets**

In [None]:
merged_df = pd.merge(df_prev, df_target_cleaned, on=['Team', 'Season'], how='inner')

## **Define Features and Target**

In [None]:
X = merged_df[['Wins', 'Loses', 'Ties', 'Goal_Scored', 'Goals_Conceded', 'Points']]
y = merged_df['NextSeasonPoints']

# **Split Data for Training and Testing**

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# **Train the Model**

In [None]:
model = RandomForestRegressor(random_state=42)
model.fit(X_train, y_train)

# **Predict and Evaluate**

In [None]:
y_pred = model.predict(X_test)
rmse = sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)

print(f"\nModel Evaluation:")
print(f"RMSE: {rmse:.2f}")
print(f"R² Score: {r2:.2f}")


Model Evaluation:
RMSE: 19.58
R² Score: 0.18


# **Predict for All Teams in the Dataset**

In [None]:
X_all = merged_df[['Wins', 'Loses', 'Ties', 'Goal_Scored', 'Goals_Conceded', 'Points']]
y_all = merged_df['NextSeasonPoints']

y_all_pred = model.predict(X_all)

results_all = pd.DataFrame({
    'Team': merged_df['Team'],
    'Actual Next Season Points': y_all,
    'Predicted Next Season Points': y_all_pred.round(1)
})
results_all = results_all.sort_values(by='Predicted Next Season Points', ascending=False)
print(results_all.to_string(index=False))


           Team  Actual Next Season Points  Predicted Next Season Points
        Ismaily                         68                          59.9
       El Masry                         63                          57.4
        Zamalek                         61                          57.0
        Ahly SC                         88                          54.5
         Smouha                         51                          53.3
       Enppi SC                         50                          51.7
      El Makasa                         46                          51.1
 Entag El Harby                         47                          42.9
   Ittihad Alex                         41                          42.7
     Wadi Degla                         37                          41.9
   El Mokawloon                         42                          41.3
Talaea El Gaish                         38                          39.5
       Petrojet                         38         

In [None]:
results_all.to_csv("/content/drive/MyDrive/Predicitons/Predicted_Team_Points_Random Forests.csv", index=False)

# **Prediciton Using Linear Regression**

In [None]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from math import sqrt

# 1. Load datasets
df_prev = pd.read_csv("/content/drive/MyDrive/Predicitons/All Pervious Seasons.csv")
df_target = pd.read_excel("/content/drive/MyDrive/Predicitons/2017-2018_Standing.xlsx")

# 2. Standardize season format (e.g., 2016-2017 -> 2016)
df_prev['Season'] = df_prev['Season'].str[:4].astype(int)
df_target['Season'] = df_target['Season'].str[:4].astype(int)

# 3. Adjust target season to match previous (e.g., predict 2017 points from 2016 features)
df_target['Season'] = df_target['Season'] - 1

# 4. Ensure correct column name in df_target
df_target_cleaned = df_target.rename(columns={'Ponits': 'Points'})  # Fix typo if present

# 5. Rename Points in target as NextSeasonPoints and select needed columns
df_target_cleaned = df_target_cleaned[['Team', 'Season', 'Points']]
df_target_cleaned = df_target_cleaned.rename(columns={'Points': 'NextSeasonPoints'})

# 6. Merge datasets
merged_df = pd.merge(df_prev, df_target_cleaned, on=['Team', 'Season'], how='inner')

# 7. Define features and target
X = merged_df[['Wins', 'Loses', 'Ties', 'Goal_Scored', 'Goals_Conceded', 'Points']]
y = merged_df['NextSeasonPoints']

# 8. Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 9. Train Linear Regression model
model = LinearRegression()
model.fit(X_train, y_train)

# 10. Evaluate model
y_pred = model.predict(X_test)
rmse = sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)

print(f"\nModel Evaluation:")
print(f"RMSE: {rmse:.2f}")
print(f"R² Score: {r2:.2f}")

# 11. Show coefficients
coeffs = pd.DataFrame({
    'Feature': X.columns,
    'Coefficient': model.coef_
})
print("\nModel Coefficients:")
print(coeffs)

# 12. Predict for all teams and save result
y_all_pred = model.predict(X)

results_all = pd.DataFrame({
    'Team': merged_df['Team'],
    'Actual Next Season Points': y,
    'Predicted Next Season Points': y_all_pred.round(1)
}).sort_values(by='Predicted Next Season Points', ascending=False)

print("\nPredicted Points for All Teams:")
print(results_all.to_string(index=False))
results_all.to_csv("/content/drive/MyDrive/Predicitons/Predicted_Team_Points_Regression.csv", index=False)


Model Evaluation:
RMSE: 9.90
R² Score: 0.79

Model Coefficients:
          Feature  Coefficient
0            Wins    -0.202703
1           Loses    -0.126491
2            Ties     0.329194
3     Goal_Scored     0.887160
4  Goals_Conceded    -0.816168
5          Points    -0.278914

Predicted Points for All Teams:
           Team  Actual Next Season Points  Predicted Next Season Points
        Ahly SC                         88                          73.8
        Ismaily                         68                          63.4
      El Makasa                         46                          61.3
       Enppi SC                         50                          55.0
        Zamalek                         61                          51.9
       El Masry                         63                          50.9
         Smouha                         51                          50.7
     Wadi Degla                         37                          45.2
       Petrojet            