In [1]:
import pandas as pd
import torch
import torch.nn as nn

In [None]:
df = pd.read_csv("data/processed/match_features_step5.csv")

In [5]:
feature_cols =[
    "team_1_avg_runs",
    "team_2_avg_runs",
    "team_1_recent_avg_runs",
    "team_2_recent_avg_runs",
    "team_1_won_toss",
    "team_1_batted_first",
    "venue_encoded"
]
X = df[feature_cols].values
y = df["team_1_won"].values

In [6]:
split_idx = int(0.8 * len(df))

X_train,X_test = X[:split_idx], X[split_idx:]
y_train,y_test = y[:split_idx],y[split_idx:]

### Normalize features

In [7]:
X_mean = X_train.mean(axis=0)
X_std = X_train.std(axis=0) + 1e-8

X_train = (X_train-X_mean)/X_std
X_test = (X_test-X_mean)/X_std

In [8]:
device = 'cuda' if torch.cuda.is_available() else "cpu"

X_train_t = torch.tensor(X_train,dtype=torch.float32).to(device)
X_test_t = torch.tensor(X_test,dtype=torch.float32).to(device)


y_train_t = torch.tensor(y_train,dtype=torch.float32).view(-1,1).to(device)
y_test_t = torch.tensor(y_test,dtype=torch.float32).view(-1,1).to(device)

In [10]:
X_train_t.shape

torch.Size([1950, 7])

In [12]:
class WinPredictor(nn.Module):
    def __init__(self,input_dim):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim,64),
            nn.ReLU(),
            nn.Linear(64,32),
            nn.ReLU(),
            nn.Linear(32,1),
            nn.Sigmoid()
        )
    
    def forward(self,x):
        return self.model(x)

In [13]:
model = WinPredictor(X_train_t.shape[1]).to(device)

loss_fn = nn.BCELoss()
optimizer = torch.optim.Adam(params=model.parameters(),lr=0.001)

In [14]:
from tqdm import tqdm
epochs = 50

for epoch in tqdm(range(epochs)):
    model.train()

    y_pred = model(X_train_t)
    optimizer.zero_grad()
    loss = loss_fn(y_pred,y_train_t)
    loss.backward()
    optimizer.step()

    ### testing
    model.eval()
    with torch.inference_mode():
        y_pred_test = model(X_test_t)
        t_loss = loss_fn(y_pred_test,y_test_t)

    if (epoch +1 ) % 10 == 0:
        print(f"Epoch {epoch+1}/{epochs} | Train Loss: {loss} | Test Loss: {t_loss}")

100%|██████████| 50/50 [00:00<00:00, 151.50it/s]

Epoch 10/50 | Train Loss: 0.6769373416900635 | Test Loss: 0.6787750124931335
Epoch 20/50 | Train Loss: 0.6583413481712341 | Test Loss: 0.6664067506790161
Epoch 30/50 | Train Loss: 0.6396312713623047 | Test Loss: 0.655788779258728
Epoch 40/50 | Train Loss: 0.6245708465576172 | Test Loss: 0.6491801142692566
Epoch 50/50 | Train Loss: 0.6161409020423889 | Test Loss: 0.6471185088157654





In [15]:
model.eval()
with torch.inference_mode():
    probs = model(X_test_t)

preds = (probs >= 0.5).float()
accuracy = (preds == y_test_t).float().mean()

accuracy.item()

0.6106557250022888

In [16]:
# Save this model
from pathlib import Path
path = Path("models")
path.mkdir(parents=True,exist_ok=True)


In [17]:
torch.save({
    "model_state_dict":model.state_dict(),
    "input_dim":X_train_t.shape[1],
    "feature_mean":X_mean,
    "feature_std":X_std,
},"models/step6_win_predictor.pth"
)

### LSTM