In [None]:
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import numpy as np
import matplotlib.pyplot as plt

# Load and preprocess the data
data = pd.read_csv('HW2_training.csv')

# Extract features and labels
features = data[['Offensive', 'Defensive']].values
labels = data['Team'].values

# Convert to PyTorch tensors
features = torch.tensor(features, dtype=torch.float32)
labels = torch.tensor(labels, dtype=torch.long)

# Create a DataLoader
dataset = TensorDataset(features, labels)
dataloader = DataLoader(dataset, batch_size=8, shuffle=True)

# Define the DNN model
class DNN(nn.Module):
    def __init__(self):
        super(DNN, self).__init__()
        self.fc1 = nn.Linear(2, 100)
        self.fc2 = nn.Linear(100, 50)
        self.fc3 = nn.Linear(50, 20)
        self.out = nn.Linear(20, 4)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.out(x)
        return F.log_softmax(x, dim=1)  # use log_softmax for numerical stability

model = DNN()

# Train the model
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 30
for epoch in range(num_epochs):
    for batch_features, batch_labels in dataloader:
        # Zero the gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(batch_features)
        loss = criterion(outputs, batch_labels)

        # Backward pass and optimize
        loss.backward()
        optimizer.step()

    print(f'Training Epoch: [{epoch+1}/{num_epochs}], Loss: {loss.item()}')

# Calculate training set accuracy and make predictions
all_labels = []
all_predictions = []

with torch.no_grad():
    correct = 0
    total = 0
    for batch_features, batch_labels in dataloader:
        outputs = model(batch_features)
        _, predicted = torch.max(outputs, 1)
        total += batch_labels.size(0)
        correct += (predicted == batch_labels).sum().item()
        all_labels.extend(batch_labels.numpy())
        all_predictions.extend(predicted.numpy())

    accuracy = 100 * correct / total
    print(f'Training Set Accuracy: {accuracy:.2f}%')

# Compute the confusion matrix
cm = confusion_matrix(all_labels, all_predictions)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=[0, 1, 2, 3])

# Plot the confusion matrix
disp.plot(cmap=plt.cm.Blues)
plt.title('Training Set Confusion Matrix')
plt.show()

# Define decision boundaries
# Create a grid of points to plot the decision boundary
x_min, x_max = features[:, 0].min() - 1, features[:, 0].max() + 1
y_min, y_max = features[:, 1].min() - 1, features[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1),
                     np.arange(y_min, y_max, 0.1))

grid = torch.tensor(np.c_[xx.ravel(), yy.ravel()], dtype=torch.float32)

# Make predictions for each point in the grid
with torch.no_grad():
    Z = model(grid)
    _, Z = torch.max(Z, 1)
    Z = Z.reshape(xx.shape)

# Plot the decision boundary
levels = np.linspace(0, 3, 100)
#contour = plt.contourf(xx, yy, Z, alpha=0.8, cmap='viridis')
contour = plt.contourf(xx, yy, Z, levels=levels, alpha=0.8, cmap='viridis', vmin=0, vmax=3)
cb = plt.colorbar(contour, ticks=np.linspace(0, 3, 7))
#cb = plt.colorbar(contour)
cb.set_label('Team')
#plt.scatter(features[:, 0], features[:, 1], c=labels, s=20, edgecolor='k', cmap=plt.cm.Paired)
plt.xlabel('Offensive')
plt.ylabel('Defensive')
plt.title('DNN Model')
plt.show()

# Load and preprocess the testing data
test_data = pd.read_csv('HW2_testing.csv')

# Extract features and labels
test_features = test_data[['Offensive', 'Defensive']].values
test_labels = test_data['Team'].values

# Convert to PyTorch tensors
test_features = torch.tensor(test_features, dtype=torch.float32)
test_labels = torch.tensor(test_labels, dtype=torch.long)

# Create a DataLoader for testing
test_dataset = TensorDataset(test_features, test_labels)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Calculate testing set accuracy and make predictions
all_test_labels = []
all_test_predictions = []

with torch.no_grad():
    correct = 0
    total = 0
    for batch_features, batch_labels in test_dataloader:
        outputs = model(batch_features)
        _, predicted = torch.max(outputs, 1)
        total += batch_labels.size(0)
        correct += (predicted == batch_labels).sum().item()
        all_test_labels.extend(batch_labels.numpy())
        all_test_predictions.extend(predicted.numpy())

    test_accuracy = 100 * correct / total
    print(f'Testing Set Accuracy: {test_accuracy:.2f}%')

# Step 6: Compute the confusion matrix for testing set
test_cm = confusion_matrix(all_test_labels, all_test_predictions)
test_disp = ConfusionMatrixDisplay(confusion_matrix=test_cm, display_labels=[0, 1, 2, 3])

# Step 7: Plot the confusion matrix for testing set
test_disp.plot(cmap=plt.cm.Blues)
plt.title('Testing Set Confusion Matrix')
plt.show()