In [67]:
import pandas as pd
import numpy as np

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

#Data Preparation
trainDf = pd.read_csv("dataset2.csv")
testDf = pd.read_csv("testingDataset.csv")
testDf = testDf[["GF", "GA", "xG", "xGA", "Poss", "Sh", "SoT", "FK", "PK", "PKatt"]]
trainDf["Winner"] = trainDf["Result"].apply(lambda x: 1 if x == "W" else 0)
target = "Winner"
features = ["GF", "GA", "xG", "xGA", "Poss", "Sh", "SoT", "FK", "PK", "PKatt"]
X = trainDf[features]
y = trainDf[target]

#Model Training
model = LogisticRegression(max_iter=500)
model.fit(X, y)

#Model Prediction
predictions = model.predict(testDf)
predicted_labels = (predictions > 0.5).astype(int)

#Output Formatting
testDfSource = pd.read_csv("testingDataset.csv")
actualWinners = testDfSource[["GF", "GA", "Team", "Opponent"]].copy()
actualWinners["Actual Winners"] = np.where(actualWinners['GF'] > actualWinners["GA"], actualWinners["Team"], actualWinners["Opponent"])
actualWinners.loc[actualWinners['GF'] == actualWinners['GA'], "Actual Winners"] = "Tie"
prediction_df = pd.DataFrame({'Win Pred.': predicted_labels})
prediction_df['Predicted Winner'] = np.where(prediction_df['Win Pred.'] == 1, testDfSource['Team'], testDfSource['Opponent'])

#Final Output
final_predictions = pd.concat([testDfSource[["Team", "Opponent", "GF", "GA"]], prediction_df, actualWinners['Actual Winners']], axis=1)
final_predictions.to_csv("finalPredictions.csv", index=False)


#Getting Model Performance Statistics
print("")

print("Final Predictions Summary: ")

y_true = final_predictions['Actual Winners']
y_pred = final_predictions['Predicted Winner']

print("")

print('Accuracy:')
print(round(accuracy_score(y_true, y_pred), 3))

print('Classification Report:')
print(classification_report(y_true, y_pred, zero_division=1))



               Team            Opponent  GF  GA  Win Pred. Predicted Winner  \
0   Manchester City           Tottenham   2   0          1  Manchester City   
1   Manchester City              Fulham   4   0          1  Manchester City   
2   Manchester City       Wolverhampton   5   1          1  Manchester City   
3   Manchester City  Nottingham Forrest   2   0          1  Manchester City   
4   Manchester City            Brighton   4   0          1  Manchester City   
5   Manchester City          Luton Town   5   1          1  Manchester City   
6   Manchester City      Crystal Palace   4   2          1  Manchester City   
7   Manchester City        Aston Villa    4   1          1  Manchester City   
8   Manchester City             Arsenal   0   0          1  Manchester City   
9   Manchester City           Liverpool   1   1          0        Liverpool   
10  Manchester City   Manchester United   3   1          1  Manchester City   
11  Manchester City         Bournemouth   1   0     

In [83]:
import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report

#Data Preparation
trainDf = pd.read_csv("dataset2.csv")
testDf = pd.read_csv("testingDataset.csv")
testDf = testDf[["GF", "GA", "xG", "xGA", "Poss", "Sh", "SoT", "FK", "PK", "PKatt"]]
trainDf["Winner"] = trainDf["Result"].apply(lambda x: 1 if x == "W" else 0)
target = "Winner"
features = ["GF", "GA", "xG", "xGA", "Poss", "Sh", "SoT", "FK", "PK", "PKatt"]
X = trainDf[features]
y = trainDf[target]

#Model Training
model = DecisionTreeClassifier(max_depth=15, min_samples_split=10, min_samples_leaf=5)
model2 = DecisionTreeClassifier(max_depth=2, min_samples_split=10, min_samples_leaf=5)
model3 = DecisionTreeClassifier(max_depth=7, min_samples_split=10, min_samples_leaf=5)
model.fit(X, y)
model2.fit(X, y)
model3.fit(X, y)

#Model Prediction
predictions = model.predict(testDf)
predictionsTest = model2.predict(testDf)
predictions3 = model3.predict(testDf)
predicted_labels = (predictions > 0.5).astype(int)
predictedTest_labels = (predictionsTest > 0.5).astype(int)
predicted_labels3 = (predictions3 > 0.5).astype(int)

#Output Formatting
testDfSource = pd.read_csv("testingDataset.csv")
actualWinners = testDfSource[["GF", "GA", "Team", "Opponent"]].copy()
actualWinners["Actual Winners"] = np.where(actualWinners['GF'] > actualWinners["GA"], actualWinners["Team"], actualWinners["Opponent"])
actualWinners.loc[actualWinners['GF'] == actualWinners['GA'], "Actual Winners"] = "Tie"
prediction_df = pd.DataFrame({'Win Pred.': predicted_labels})
predictionTest_df = pd.DataFrame({"Win Pred.": predictedTest_labels})
prediction3_df = pd.DataFrame({'Win Pred.': predicted_labels3})
prediction_df['Predicted Winner'] = np.where(prediction_df['Win Pred.'] == 1, testDfSource['Team'], testDfSource['Opponent'])
predictionTest_df['Predicted Winner'] = np.where(predictionTest_df['Win Pred.'] == 1, testDfSource['Team'], testDfSource['Opponent'])
prediction3_df['Predicted Winner'] = np.where(prediction3_df['Win Pred.'] == 1, testDfSource['Team'], testDfSource['Opponent'])

#Final Output
final_predictions = pd.concat([testDfSource[["Team", "Opponent", "GF", "GA"]], prediction_df, actualWinners['Actual Winners']], axis=1)
finalTest_predictions = pd.concat([testDfSource[["Team", "Opponent", "GF", "GA"]], predictionTest_df, actualWinners['Actual Winners']], axis=1)
final_predictions3 = pd.concat([testDfSource[["Team", "Opponent", "GF", "GA"]], prediction3_df, actualWinners['Actual Winners']], axis=1)
final_predictions3.to_csv("finalPredictions_DecisionTreesMaxDepth7.csv", index=False)


#Getting Model Performance Statistics
print("")

print("Final Predictions Summary: ")

y_true = final_predictions['Actual Winners']
y_pred = final_predictions['Predicted Winner']
y2_true = finalTest_predictions['Actual Winners']
y2_pred = finalTest_predictions['Predicted Winner']
y3_true = final_predictions3['Actual Winners']
y3_pred = final_predictions3['Predicted Winner']


print("")

print('Accuracy With Max Depth (15):')
print(round(accuracy_score(y_true, y_pred), 3))

print('Classification Report With Max Depth (15):')
print(classification_report(y_true, y_pred, zero_division=1))

print("")

print('Accuracy With Max Depth (2):')
print(round(accuracy_score(y2_true, y2_pred), 3))

print('Classification Report With Max Depth (2):')
print(classification_report(y2_true, y2_pred, zero_division=1))

print("")

print('Accuracy With Max Depth (7):')
print(round(accuracy_score(y3_true, y3_pred), 3))

print('Classification Report With Max Depth (7):')
print(classification_report(y3_true, y3_pred, zero_division=1))



Final Predictions Summary: 

Accuracy With Max Depth (15):
0.811
Classification Report With Max Depth (15):
                 precision    recall  f1-score   support

        Arsenal       0.50      1.00      0.67         1
   Aston Villa        1.00      1.00      1.00         1
        Chelsea       0.00      1.00      0.00         0
 Crystal Palace       0.00      1.00      0.00         0
      Liverpool       0.00      1.00      0.00         0
Manchester City       1.00      1.00      1.00        27
            Tie       1.00      0.00      0.00         7
      Tottenham       0.00      1.00      0.00         0
  Wolverhampton       1.00      1.00      1.00         1

       accuracy                           0.81        37
      macro avg       0.50      0.89      0.41        37
   weighted avg       0.99      0.81      0.80        37


Accuracy With Max Depth (2):
0.757
Classification Report With Max Depth (2):
                 precision    recall  f1-score   support

        Ars

In [85]:
import pandas as pd
import numpy as np
from sklearn import svm
from sklearn.metrics import accuracy_score, classification_report

# Data Preparation
trainDf = pd.read_csv("dataset2.csv")
testDf = pd.read_csv("testingDataset.csv")
testDf = testDf[["GF", "GA", "xG", "xGA", "Poss", "Sh", "SoT", "FK", "PK", "PKatt"]]
trainDf["Winner"] = trainDf["Result"].apply(lambda x: 1 if x == "W" else 0)
target = "Winner"
features = ["GF", "GA", "xG", "xGA", "Poss", "Sh", "SoT", "FK", "PK", "PKatt"]
X = trainDf[features]
y = trainDf[target]

# Model Training
model = svm.SVC()  # Use SVM with default parameters
model.fit(X, y)

# Model Prediction
predictions = model.predict(testDf)
predicted_labels = predictions  # SVM returns class labels directly

# Output Formatting
testDfSource = pd.read_csv("testingDataset.csv")
actualWinners = testDfSource[["GF", "GA", "Team", "Opponent"]].copy()
actualWinners["Actual Winners"] = np.where(actualWinners['GF'] > actualWinners["GA"], actualWinners["Team"], actualWinners["Opponent"])
actualWinners.loc[actualWinners['GF'] == actualWinners['GA'], "Actual Winners"] = "Tie"
prediction_df = pd.DataFrame({'Win Pred.': predicted_labels})
prediction_df['Predicted Winner'] = np.where(prediction_df['Win Pred.'] == 1, testDfSource['Team'], testDfSource['Opponent'])

# Final Output
final_predictions = pd.concat([testDfSource[["Team", "Opponent", "GF", "GA"]], prediction_df, actualWinners['Actual Winners']], axis=1)
final_predictions.to_csv("finalPredictions_SVM.csv", index=False)

#Getting Model Performance Statistics
print("")

print("Final Predictions Summary: ")

y_true = final_predictions['Actual Winners']
y_pred = final_predictions['Predicted Winner']

print("")

print('Accuracy:')
print(round(accuracy_score(y_true, y_pred), 3))

print('Classification Report:')
print(classification_report(y_true, y_pred, zero_division=1))


Final Predictions Summary: 

Accuracy:
0.676
Classification Report:
                    precision    recall  f1-score   support

           Arsenal       0.50      1.00      0.67         1
      Aston Villa        1.00      1.00      1.00         1
          Brighton       0.00      1.00      0.00         0
         Liverpool       0.00      1.00      0.00         0
   Manchester City       0.85      0.85      0.85        27
         Newcastle       0.00      1.00      0.00         0
Nottingham Forrest       0.00      1.00      0.00         0
               Tie       1.00      0.00      0.00         7
         Tottenham       0.00      1.00      0.00         0
     Wolverhampton       1.00      0.00      0.00         1

          accuracy                           0.68        37
         macro avg       0.44      0.79      0.25        37
      weighted avg       0.88      0.68      0.67        37

