In [1]:
import pandas as pd

In [2]:
# Read the CSV file into a DataFrame
teams_stats = pd.read_csv('../basketballPlayoffs/teams_stats_encoded.csv')

In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import accuracy_score
from sklearn.model_selection import KFold
import numpy as np

# Split data into features and target
X = teams_stats.drop(columns=['playoff']).values.astype(np.float32)
y = teams_stats['playoff'].values.astype(np.float32)

# Define a function for creating the PyTorch model
def create_model(input_dim, hidden_dim, num_layers):
    model = nn.Sequential()
    model.add_module('input_layer', nn.Linear(input_dim, hidden_dim))
    model.add_module('activation', nn.ReLU())
    for _ in range(num_layers):
        model.add_module('hidden_layer', nn.Linear(hidden_dim, hidden_dim))
        model.add_module('activation', nn.ReLU())
    model.add_module('output_layer', nn.Linear(hidden_dim, 1))
    model.add_module('output_activation', nn.Sigmoid())
    return model

# Define a function for training the model
def train_model(model, X_train, y_train, num_epochs, batch_size, lr):
    criterion = nn.BCELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    
    for epoch in range(num_epochs):
        for i in range(0, X_train.shape[0], batch_size):
            inputs = X_train[i:i+batch_size]
            labels = y_train[i:i+batch_size]
            
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels.view(-1, 1))
            loss.backward()
            optimizer.step()

# Define hyperparameter search space
hidden_dims = [32, 64]
num_layers_values = [1, 2]
num_epochs_values = [10, 20]
batch_sizes = [32, 64]
learning_rates = [0.001, 0.01]

results = []

kf = KFold(n_splits=10, shuffle=True, random_state=42)  # You can adjust n_splits as needed

# Iterate through hyperparameters
for hidden_dim in hidden_dims:
    for num_layers in num_layers_values:
        for num_epochs in num_epochs_values:
            for batch_size in batch_sizes:
                for lr in learning_rates:
                    accuracies = []
                    for train_idx, test_idx in kf.split(X):
                        X_train, X_test = X[train_idx], X[test_idx]
                        y_train, y_test = y[train_idx], y[test_idx]

                        # Convert data to PyTorch tensors
                        X_train = torch.from_numpy(X_train)
                        y_train = torch.from_numpy(y_train)
                        X_test = torch.from_numpy(X_test)
                        y_test = torch.from_numpy(y_test)

                        print(f"Training model with hidden_dim={hidden_dim}, num_layers={num_layers}, num_epochs={num_epochs}, batch_size={batch_size}, lr={lr}")

                        # Create the PyTorch model
                        input_dim = X_train.shape[1]
                        model = create_model(input_dim, hidden_dim, num_layers)

                        # Train the model
                        train_model(model, X_train, y_train, num_epochs, batch_size, lr)

                        # Evaluate the model on the test data
                        model.eval()
                        with torch.no_grad():
                            y_pred = (model(X_test) > 0.5).numpy().flatten()

                        # Calculate accuracy
                        accuracy = accuracy_score(y_test.numpy(), y_pred)
                        accuracies.append(accuracy)
                        print(f"Fold Accuracy: {accuracy}")

                    # Calculate average accuracy and standard deviation across folds
                    avg_accuracy = np.mean(accuracies)
                    std_dev = np.std(accuracies)

                    results.append({
                        'hidden_dim': hidden_dim,
                        'num_layers': num_layers,
                        'num_epochs': num_epochs,
                        'batch_size': batch_size,
                        'lr': lr,
                        'avg_accuracy': avg_accuracy,
                        'std_dev': std_dev
                    })

# Find the best model based on the average accuracy across folds
best_model_info = max(results, key=lambda x: x['avg_accuracy'])
print("Best Model Information:", best_model_info)


Training model with hidden_dim=32, num_layers=1, num_epochs=10, batch_size=32, lr=0.001
Fold Accuracy: 0.7692307692307693
Training model with hidden_dim=32, num_layers=1, num_epochs=10, batch_size=32, lr=0.001
Fold Accuracy: 0.5384615384615384
Training model with hidden_dim=32, num_layers=1, num_epochs=10, batch_size=32, lr=0.001
Fold Accuracy: 0.6153846153846154
Training model with hidden_dim=32, num_layers=1, num_epochs=10, batch_size=32, lr=0.001
Fold Accuracy: 0.6923076923076923
Training model with hidden_dim=32, num_layers=1, num_epochs=10, batch_size=32, lr=0.001
Fold Accuracy: 0.6153846153846154
Training model with hidden_dim=32, num_layers=1, num_epochs=10, batch_size=32, lr=0.001
Fold Accuracy: 0.38461538461538464
Training model with hidden_dim=32, num_layers=1, num_epochs=10, batch_size=32, lr=0.001
Fold Accuracy: 0.5
Training model with hidden_dim=32, num_layers=1, num_epochs=10, batch_size=32, lr=0.001
Fold Accuracy: 0.4166666666666667
Training model with hidden_dim=32, num