In [1]:
import pickle
import os
import numpy as np
directory = r'D:\github\Cricket-Prediction\data\5_pytorchData'

import torch
from torch.utils.data import Dataset

# Augment the data by creating new samples with different combinations of overs
def augment_data(team_stats_list, player_stats_list, ball_stats_list, over_segments=np.arange(7, 40)): 
    augmented_team_stats = []
    augmented_player_stats = []
    augmented_ball_stats = []
    
    for team_stats, player_stats, ball_stats in zip(team_stats_list, player_stats_list, ball_stats_list):
        total_overs = ball_stats.shape[0] // 6  # Assuming 6 balls per over
        for segment in over_segments:
            if total_overs >= segment:
                end_idx = segment * 6
                augmented_team_stats.append(team_stats)
                augmented_player_stats.append(player_stats)
                augmented_ball_stats.append(ball_stats[:end_idx])
    
    return augmented_team_stats, augmented_player_stats, augmented_ball_stats

# Create a custom Dataset
class CricketDataset(Dataset):
    def __init__(self, team_stats_list, player_stats_list, ball_stats_list):
        self.team_stats_list = team_stats_list
        self.player_stats_list = player_stats_list
        self.ball_stats_list = ball_stats_list

    def __len__(self):
        return len(self.team_stats_list)

    def __getitem__(self, idx):
        team_input = torch.tensor(self.team_stats_list[idx], dtype=torch.float32)
        team_input = team_input.squeeze()  # Remove extra dimensions
        player_input = torch.tensor(self.player_stats_list[idx], dtype=torch.float32)
        ball_stats = torch.tensor(self.ball_stats_list[idx], dtype=torch.float32)
        # Assuming the last column is the label
        ball_input = ball_stats[:, :-1]
        label = ball_stats[0, -1]
        return team_input, player_input, ball_input, label

# Define a collate function to handle variable-length sequences
def collate_fn(batch):
    team_inputs = []
    player_inputs = []
    ball_inputs = []
    labels = []
    ball_lengths = []

    for team_input, player_input, ball_input, label in batch:
        team_inputs.append(team_input)
        player_inputs.append(player_input)
        ball_inputs.append(ball_input)
        labels.append(label)
        ball_lengths.append(ball_input.shape[0])

    # Pad ball_inputs to the maximum sequence length in the batch
    max_seq_len = max(ball_lengths)
    padded_ball_inputs = torch.zeros(len(ball_inputs), max_seq_len, ball_inputs[0].shape[1])
    for i, ball_input in enumerate(ball_inputs):
        seq_len = ball_input.shape[0]
        padded_ball_inputs[i, :seq_len, :] = ball_input

    team_inputs = torch.stack(team_inputs)
    player_inputs = torch.stack(player_inputs)
    labels = torch.tensor(labels, dtype=torch.float32)
    return team_inputs, player_inputs, padded_ball_inputs, labels, ball_lengths


test_dataloader = pickle.load(open(os.path.join(directory, 'test_dataloader.pkl'), 'rb'))

In [2]:
import torch.nn as nn
import torch.nn.functional as F

# Define the models
class TeamStatsModel(nn.Module):
    def __init__(self, input_size):
        super(TeamStatsModel, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_size, 64),
            nn.ReLU(),
            nn.BatchNorm1d(64),
            nn.Dropout(0.5),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.BatchNorm1d(32),
            nn.Dropout(0.5),
            nn.Linear(32, 16),
            nn.ReLU()
        )

    def forward(self, x):
        return self.model(x)

class PlayerStatsModel(nn.Module):
    def __init__(self, input_size, seq_len):
        super(PlayerStatsModel, self).__init__()
        self.conv1 = nn.Conv1d(in_channels=input_size, out_channels=32, kernel_size=3)
        self.bn1 = nn.BatchNorm1d(32)
        self.pool1 = nn.MaxPool1d(2)
        self.conv2 = nn.Conv1d(32, 64, kernel_size=3)
        self.bn2 = nn.BatchNorm1d(64)
        self.pool2 = nn.MaxPool1d(2)
        self.flatten = nn.Flatten()
        self.fc = nn.Linear(64 * ((seq_len - 4) // 4), 16)  # Adjust input size dynamically

    def forward(self, x):
        x = x.permute(0, 2, 1)  # Convert to (batch, channels, seq_len)
        x = F.relu(self.bn1(self.conv1(x)))
        x = self.pool1(x)
        x = F.relu(self.bn2(self.conv2(x)))
        x = self.pool2(x)
        x = self.flatten(x)
        x = F.relu(self.fc(x))
        return x

class BallToBallModel(nn.Module):
    def __init__(self, input_dim):
        super(BallToBallModel, self).__init__()
        self.lstm = nn.LSTM(input_dim, 128, batch_first=True, bidirectional=False)  # Not bidirectional
        self.dropout = nn.Dropout(0.5)
        self.fc = nn.Linear(128, 16)  # Adjust input size to 128

    def forward(self, x, lengths):
        # Pack the sequences
        x_packed = nn.utils.rnn.pack_padded_sequence(x, lengths, batch_first=True, enforce_sorted=False)
        output_packed, (hn, cn) = self.lstm(x_packed)
        # Use the final hidden state directly
        hn = hn[-1,:,:]
        x = self.dropout(hn)
        x = F.relu(self.fc(x))
        return x

class CombinedModel(nn.Module):
    def __init__(self, team_input_size, player_input_size, player_seq_len, ball_input_dim):
        super(CombinedModel, self).__init__()
        self.team_model = TeamStatsModel(team_input_size)
        self.player_model = PlayerStatsModel(player_input_size, player_seq_len)
        self.ball_model = BallToBallModel(ball_input_dim)
        self.fc = nn.Sequential(
            nn.Linear(16+16+16, 64),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(32, 1),
            nn.Sigmoid()
        )

    def forward(self, team_input, player_input, ball_input, ball_lengths):
        team_output = self.team_model(team_input)
        player_output = self.player_model(player_input)
        ball_output = self.ball_model(ball_input, ball_lengths)
        combined = torch.cat((team_output, player_output, ball_output), dim=1)
        output = self.fc(combined)
        return output.squeeze()

In [4]:
# Check if GPU is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Initialize the model
team_input_size = test_dataloader.dataset[0][0].shape[0]
player_input_size = test_dataloader.dataset[0][1].shape[1]
player_seq_len = test_dataloader.dataset[0][1].shape[0]  # Sequence length for player stats
ball_input_dim = test_dataloader.dataset[0][2].shape[1]

# Initialize the model
model = CombinedModel(team_input_size, player_input_size, player_seq_len, ball_input_dim).to(device)
criterion = nn.BCELoss()

# Load the t20i model weights
model.load_state_dict(torch.load('../2_naivetraining/t20i.pth',weights_only=True))

# Testing
model.eval()
test_correct_predictions = 0
test_total_predictions = 0
test_running_loss = 0.0
with torch.no_grad():
    for team_input, player_input, ball_input, labels, ball_lengths in test_dataloader:
        team_input, player_input, ball_input, labels = team_input.to(device), player_input.to(device), ball_input.to(device), labels.to(device)
        outputs = model(team_input, player_input, ball_input, ball_lengths)
        loss = criterion(outputs, labels)
        test_running_loss += loss.item()

        # Calculate accuracy
        predictions = (outputs > 0.5).float()
        test_correct_predictions += (predictions == labels).sum().item()
        test_total_predictions += labels.size(0)

test_avg_loss = test_running_loss / len(test_dataloader)
test_accuracy = test_correct_predictions / test_total_predictions
print(f"Test Loss: {test_avg_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")

Test Loss: 0.0235, Test Accuracy: 0.9893


In [5]:
from sklearn.metrics import confusion_matrix, classification_report

# Get the predictions and ground truth labels
all_predictions = []
all_labels = []
with torch.no_grad():
    for team_input, player_input, ball_input, labels, ball_lengths in test_dataloader:
        team_input, player_input, ball_input, labels = team_input.to(device), player_input.to(device), ball_input.to(device), labels.to(device)
        outputs = model(team_input, player_input, ball_input, ball_lengths)
        predictions = (outputs > 0.5).float()
        all_predictions.extend(predictions.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

all_predictions = np.array(all_predictions)
all_labels = np.array(all_labels)

# Print classification report
print(classification_report(all_labels, all_predictions))

              precision    recall  f1-score   support

         0.0       0.98      1.00      0.99      4478
         1.0       1.00      0.98      0.99      4497

    accuracy                           0.99      8975
   macro avg       0.99      0.99      0.99      8975
weighted avg       0.99      0.99      0.99      8975



In [8]:
# filter test data to take only the first 10 overs
test_dataloader.dataset.ball_stats_list = [ball_stats[:20] for ball_stats in test_dataloader.dataset.ball_stats_list]

# Testing
model.eval()
test_correct_predictions = 0
test_total_predictions = 0
test_running_loss = 0.0
with torch.no_grad():
    for team_input, player_input, ball_input, labels, ball_lengths in test_dataloader:
        team_input, player_input, ball_input, labels = team_input.to(device), player_input.to(device), ball_input.to(device), labels.to(device)
        outputs = model(team_input, player_input, ball_input, ball_lengths)
        loss = criterion(outputs, labels)
        test_running_loss += loss.item()

        # Calculate accuracy
        predictions = (outputs > 0.5).float()
        test_correct_predictions += (predictions == labels).sum().item()
        test_total_predictions += labels.size(0)

test_avg_loss = test_running_loss / len(test_dataloader)
test_accuracy = test_correct_predictions / test_total_predictions
print(f"Test Loss: {test_avg_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")

# Get the predictions and ground truth labels
all_predictions = []
all_labels = []

with torch.no_grad():
    for team_input, player_input, ball_input, labels, ball_lengths in test_dataloader:
        team_input, player_input, ball_input, labels = team_input.to(device), player_input.to(device), ball_input.to(device), labels.to(device)
        outputs = model(team_input, player_input, ball_input, ball_lengths)
        predictions = (outputs > 0.5).float()
        all_predictions.extend(predictions.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

all_predictions = np.array(all_predictions)
all_labels = np.array(all_labels)

# Print classification report
print(classification_report(all_labels, all_predictions))

Test Loss: 0.0236, Test Accuracy: 0.9889
              precision    recall  f1-score   support

         0.0       0.98      1.00      0.99      4478
         1.0       1.00      0.98      0.99      4497

    accuracy                           0.99      8975
   macro avg       0.99      0.99      0.99      8975
weighted avg       0.99      0.99      0.99      8975

