In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split, cross_val_score, KFold, GridSearchCV
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import xgboost as xgb
from sklearn.linear_model import LogisticRegression
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

In [2]:
# impert pytorch
import numpy as np
import pandas as pd

filename = '../EPL_Dataset/Final_Data.csv'
data = pd.read_csv(filename)

data.columns

df = data
# Label Encoding
label_encoder = LabelEncoder()
df['FTR'] = label_encoder.fit_transform(df['FTR'])

label_mapping = dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))
print(label_mapping)

# Feature Selection
X = df.drop(columns=['Season', 'Home', 'Away', 'FTR', 'Time_Weight', 'Text'])  # Drop unnecessary columns
y = df['FTR']

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

{'A': 0, 'D': 1, 'H': 2}


In [3]:
# Calculate Time Weights
alpha = 0.6
weights = alpha ** (1 - data['Time_Weight'])

# XGBoost Classifier with Default Parameters
model = xgb.XGBClassifier(objective='multi:softmax',
                          num_class=3,
                          n_estimators=100,
                          learning_rate=0.1,
                          max_depth=5,
                          random_state=42)

# Cross Validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)
cross_val_scores = cross_val_score(model, X_train, y_train, cv=kf, scoring='accuracy')
mean_cv_accuracy = np.mean(cross_val_scores) * 100
print(f'Mean Cross-Validation Accuracy: {mean_cv_accuracy:.2f}%')

# Model Fitting
model.fit(X_train, y_train, sample_weight=weights.loc[y_train.index].values)

# Prediction
y_pred = model.predict(X_test)

# Evaluation
accuracy = accuracy_score(y_test, y_pred) * 100
print(f'Accuracy: {accuracy:.2f}%')
# print('Classification Report:')
# print(classification_report(y_test, y_pred))



Mean Cross-Validation Accuracy: 53.55%
Accuracy: 60.82%


In [4]:
# Logistic Regression Model Training
model = LogisticRegression(max_iter=1000, random_state=42)
model.fit(X_train, y_train)

# Cross Validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)
cross_val_scores = cross_val_score(model, X_train, y_train, cv=kf, scoring='accuracy')
mean_cv_accuracy = np.mean(cross_val_scores) * 100
print(f'Mean Cross-Validation Accuracy: {mean_cv_accuracy:.2f}%')

# Fit Model
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)

# Evaluation
accuracy = accuracy_score(y_test, y_pred) * 100
print(f'Accuracy: {accuracy:.2f}%')
# print('Confusion Matrix:')
# print(confusion_matrix(y_test, y_pred))
# print('Classification Report:')
# print(classification_report(y_test, y_pred))

# Optional: Feature Importance (Logistic Regression Coefficients)
# feature_importance = pd.Series(model.coef_[0], index=X.columns).sort_values(ascending=False)
# print('Feature Importance:')
# print(feature_importance)



Mean Cross-Validation Accuracy: 56.92%
Accuracy: 60.07%


In [5]:
# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.long)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.long)

# Create DataLoader
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

# Neural Network Model Definition
class NeuralNetwork(nn.Module):
    def __init__(self, input_size, num_classes):
        super(NeuralNetwork, self).__init__()
        self.fc1 = nn.Linear(input_size, 128)
        self.dropout1 = nn.Dropout(0.3)
        self.fc2 = nn.Linear(128, 64)
        self.dropout2 = nn.Dropout(0.3)
        self.fc3 = nn.Linear(64, num_classes)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.dropout1(x)
        x = torch.relu(self.fc2(x))
        x = self.dropout2(x)
        x = torch.softmax(self.fc3(x), dim=1)
        return x

# Model Initialization
input_size = X_train.shape[1]
num_classes = len(y_train.unique())
model = NeuralNetwork(input_size, num_classes)

# Loss and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training the Model
epochs = 50
for epoch in range(epochs):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        # Forward pass
        outputs = model(data)
        loss = criterion(outputs, target)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    # Print loss for every epoch
    print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}')

# Evaluate the Model
model.eval()
with torch.no_grad():
    y_pred_nn = model(X_test_tensor)
    y_pred_nn_classes = torch.argmax(y_pred_nn, axis=1).numpy()

# Evaluation
accuracy_nn = accuracy_score(y_test, y_pred_nn_classes)
print(f'Neural Network Test Accuracy: {accuracy_nn:.2f}')
print('Confusion Matrix:')
print(confusion_matrix(y_test, y_pred_nn_classes))
print('Classification Report:')
print(classification_report(y_test, y_pred_nn_classes))

Epoch [1/50], Loss: 1.0445
Epoch [2/50], Loss: 1.0739
Epoch [3/50], Loss: 0.9986
Epoch [4/50], Loss: 1.1362
Epoch [5/50], Loss: 0.8839
Epoch [6/50], Loss: 0.9857
Epoch [7/50], Loss: 0.9233
Epoch [8/50], Loss: 0.9358
Epoch [9/50], Loss: 0.7954
Epoch [10/50], Loss: 1.0390
Epoch [11/50], Loss: 0.9211
Epoch [12/50], Loss: 0.8664
Epoch [13/50], Loss: 1.0363
Epoch [14/50], Loss: 0.9019
Epoch [15/50], Loss: 0.9839
Epoch [16/50], Loss: 1.0817
Epoch [17/50], Loss: 0.9360
Epoch [18/50], Loss: 0.7822
Epoch [19/50], Loss: 0.8610
Epoch [20/50], Loss: 0.7989
Epoch [21/50], Loss: 0.7166
Epoch [22/50], Loss: 0.9698
Epoch [23/50], Loss: 0.9977
Epoch [24/50], Loss: 0.8514
Epoch [25/50], Loss: 0.7414
Epoch [26/50], Loss: 0.8245
Epoch [27/50], Loss: 0.8750
Epoch [28/50], Loss: 0.7983
Epoch [29/50], Loss: 0.8536
Epoch [30/50], Loss: 0.9318
Epoch [31/50], Loss: 0.8786
Epoch [32/50], Loss: 0.8871
Epoch [33/50], Loss: 0.7541
Epoch [34/50], Loss: 1.0268
Epoch [35/50], Loss: 0.8304
Epoch [36/50], Loss: 0.7768
E