In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader

import pandas as pd
import numpy as np
from pathlib import Path

# repository root: prefer file-based parent when available, else two levels up
try:
    ROOT = Path(__file__).resolve().parents[1]
except NameError:
    ROOT = Path().resolve().parents[1]


In [None]:
# load splits
splits_dir = Path(ROOT) / "src" / "ryan" / "encoded_outputs"

X_train = pd.read_csv(splits_dir / "x_train.csv")
X_val   = pd.read_csv(splits_dir / "x_val.csv")
X_test  = pd.read_csv(splits_dir / "x_test.csv")

y_train = pd.read_csv(splits_dir / "y_train.csv").squeeze()
y_val   = pd.read_csv(splits_dir / "y_val.csv").squeeze()
y_test  = pd.read_csv(splits_dir / "y_test.csv").squeeze()


# Drop match_id if still present accidentally
for df in [X_train, X_val, X_test]:
    if "match_id" in df.columns:
        df.drop(columns=["match_id"], inplace=True)

TypeError: unsupported operand type(s) for /: 'str' and 'str'

In [None]:
# convert to tensors
X_train_t = torch.tensor(X_train.values, dtype=torch.float32)
X_val_t   = torch.tensor(X_val.values, dtype=torch.float32)
X_test_t  = torch.tensor(X_test.values, dtype=torch.float32)

y_train_t = torch.tensor(y_train, dtype=torch.float32)
y_val_t   = torch.tensor(y_val, dtype=torch.float32)
y_test_t  = torch.tensor(y_test, dtype=torch.float32)

# create data loaders
train_ds = TensorDataset(X_train_t, y_train_t)
val_ds   = TensorDataset(X_val_t, y_val_t)
test_ds  = TensorDataset(X_test_t, y_test_t)

train_loader = DataLoader(train_ds, batch_size=1024, shuffle=True)
val_loader   = DataLoader(val_ds, batch_size=2048)
test_loader  = DataLoader(test_ds, batch_size=2048)

In [None]:
# create logistic regression model
input_dim = X_train.shape[1]

class LogisticRegression(nn.Module):
    def __init__(self, dim):
        super().__init__()
        self.linear = nn.Linear(dim, 1)

    def forward(self, x):
        return torch.sigmoid(self.linear(x))

model = LogisticRegression(input_dim)

In [None]:
# handle side win imbalance
positive_weight = (len(y_train) - y_train.sum()) / y_train.sum()
positive_weight = float(positive_weight)

criterion = nn.BCELoss(weight=None, reduction='mean')

# optimizer w l2 regularization
optimizer = optim.Adam(
    model.parameters(),
    lr=1e-3,
    weight_decay=1e-4
)

In [None]:
# training loop
def evaluate(loader):
    model.eval()
    correct = 0
    total = 0
    probs = []
    labels = []
    
    with torch.no_grad():
        for x, y in loader:
            p = model(x).squeeze()
            pred = (p >= 0.5).float()
            correct += (pred == y).sum().item()
            total += y.size(0)
            probs.extend(p.cpu().numpy())
            labels.extend(y.cpu().numpy())
    
    return correct / total, np.array(probs), np.array(labels)

epochs = 20
for epoch in range(epochs):
    model.train()
    for xb, yb in train_loader:
        optimizer.zero_grad()
        preds = model(xb).squeeze()
        
        # weighted BCE loss
        weights = torch.where(yb == 1, positive_weight, 1.0)
        loss = nn.BCELoss(weight=weights)(preds, yb)
        
        loss.backward()
        optimizer.step()

    # validation accuracy
    val_acc, _, _ = evaluate(val_loader)
    print(f"Epoch {epoch+1}/{epochs} — Val Acc: {val_acc:.4f}")

In [None]:
# eval
test_acc, test_probs, test_labels = evaluate(test_loader)
print("\n===== TEST ACCURACY =====")
print(f"Test Accuracy: {test_acc:.4f}")

Epoch 1/20 — Val Acc: 0.5599
Epoch 2/20 — Val Acc: 0.5624
Epoch 2/20 — Val Acc: 0.5624
Epoch 3/20 — Val Acc: 0.5647
Epoch 3/20 — Val Acc: 0.5647
Epoch 4/20 — Val Acc: 0.5685
Epoch 4/20 — Val Acc: 0.5685
Epoch 5/20 — Val Acc: 0.5713
Epoch 5/20 — Val Acc: 0.5713
Epoch 6/20 — Val Acc: 0.5710
Epoch 6/20 — Val Acc: 0.5710
Epoch 7/20 — Val Acc: 0.5737
Epoch 7/20 — Val Acc: 0.5737
Epoch 8/20 — Val Acc: 0.5765
Epoch 8/20 — Val Acc: 0.5765
Epoch 9/20 — Val Acc: 0.5754
Epoch 9/20 — Val Acc: 0.5754
Epoch 10/20 — Val Acc: 0.5745
Epoch 10/20 — Val Acc: 0.5745
Epoch 11/20 — Val Acc: 0.5772
Epoch 11/20 — Val Acc: 0.5772
Epoch 12/20 — Val Acc: 0.5754
Epoch 12/20 — Val Acc: 0.5754
Epoch 13/20 — Val Acc: 0.5778
Epoch 13/20 — Val Acc: 0.5778
Epoch 14/20 — Val Acc: 0.5770
Epoch 14/20 — Val Acc: 0.5770
Epoch 15/20 — Val Acc: 0.5775
Epoch 15/20 — Val Acc: 0.5775
Epoch 16/20 — Val Acc: 0.5778
Epoch 16/20 — Val Acc: 0.5778
Epoch 17/20 — Val Acc: 0.5772
Epoch 17/20 — Val Acc: 0.5772
Epoch 18/20 — Val Acc: 0.57