### TASK 1

In [1]:
import pandas as pd
import torch
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

df = pd.read_csv('league_of_legends_data_large.csv')

X = df.drop('win', axis=1)
Y = df['win']

x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

scaler = StandardScaler()
scaler.fit(x_train)

x_train_scaled = scaler.transform(x_train)
x_test_scaled = scaler.transform(x_test)

x_train_scaled_tensor = torch.tensor(x_train_scaled, dtype=torch.float32)
x_test_scaled_tensor = torch.tensor(x_test_scaled, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).unsqueeze(1)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).unsqueeze(1)

### TASK 2

In [2]:
import torch.nn as nn
from torch.nn import BCELoss
import torch.optim as optim

class Model(nn.Module):
    def __init__(self, num_features_entrada):
        super(Model, self).__init__()
        self.linear = nn.Linear(num_features_entrada, 1)

    def forward(self, x):
        linear_output = self.linear(x)
        probability = torch.sigmoid(linear_output)
        return probability
    
model = Model(num_features_entrada=x_train_scaled_tensor.shape[1])
criterion = BCELoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

### TASK 3

In [3]:
EPOCHS = 1000
for x in range(EPOCHS):
    model.train()
    optimizer.zero_grad()
    output = model(x_train_scaled_tensor)
    loss = criterion(output, y_train_tensor)
    loss.backward()
    optimizer.step()

    if x%100 == 0:
        print(f"Epoch: {x}, Loss: {loss}")

model.eval()
with torch.no_grad():
    output_train = model(x_train_scaled_tensor)
    output_test = model(x_test_scaled_tensor)
    for e, x in enumerate(output_test):
        if x >= 0.5:
            output_test[e] = 1
        else:
            output_test[e] = 0
    for e, x in enumerate(output_train):
        if x >= 0.5:
            output_train[e] = 1
        else:
            output_train[e] = 0

    result_test = output_test == y_test_tensor
    result_train = output_train == y_train_tensor
    print('Test Accuracy: ', result_test.float().mean())
    print('Train Accuracy: ',result_train.float().mean())


Epoch: 0, Loss: 0.7366809248924255
Epoch: 100, Loss: 0.7173761129379272
Epoch: 200, Loss: 0.7051254510879517
Epoch: 300, Loss: 0.6974876523017883
Epoch: 400, Loss: 0.6927720904350281
Epoch: 500, Loss: 0.6898739337921143
Epoch: 600, Loss: 0.6880954504013062
Epoch: 700, Loss: 0.6870036125183105
Epoch: 800, Loss: 0.686332643032074
Epoch: 900, Loss: 0.6859195828437805
Test Accuracy:  tensor(0.4800)
Train Accuracy:  tensor(0.5300)


### TASK 4

In [4]:
optimizer = optim.SGD(model.parameters(), lr=0.01, weight_decay=0.01)

EPOCHS = 1000
for x in range(EPOCHS):
    model.train()
    optimizer.zero_grad()
    output = model(x_train_scaled_tensor)
    loss = criterion(output, y_train_tensor)
    loss.backward()
    optimizer.step()

    if x%100 == 0:
        print(f"Epoch: {x}, Loss: {loss}")

model.eval()
with torch.no_grad():
    output_train = model(x_train_scaled_tensor)
    output_test = model(x_test_scaled_tensor)

    output_train_binary = []
    output_test_binary = []
    for e, x in enumerate(output_test):
        if x >= 0.5:
            output_test_binary.append(1)
        else:
            output_test_binary.append(0)
    for e, x in enumerate(output_train):
        if x >= 0.5:
            output_train_binary.append(1)
        else:
            output_train_binary.append(0)

    result_test = output_test == y_test_tensor
    result_train = output_train == y_train_tensor
    print('Test Accuracy: ', result_test.float().mean())
    print('Train Accuracy: ',result_train.float().mean())


Epoch: 0, Loss: 0.6856648325920105
Epoch: 100, Loss: 0.6855101585388184
Epoch: 200, Loss: 0.6854150295257568
Epoch: 300, Loss: 0.6853564977645874
Epoch: 400, Loss: 0.685320258140564
Epoch: 500, Loss: 0.6852979063987732
Epoch: 600, Loss: 0.6852839589118958
Epoch: 700, Loss: 0.6852753162384033
Epoch: 800, Loss: 0.6852699518203735
Epoch: 900, Loss: 0.6852664947509766
Test Accuracy:  tensor(0.)
Train Accuracy:  tensor(0.)


### TASK 5

In [5]:
from sklearn.metrics import classification_report, confusion_matrix, roc_curve, auc
import numpy as np

y_test_numpy = y_test_tensor.numpy()
y_train_numpy = y_train_tensor.numpy()

output_test_numpy = output_test.numpy()
output_train_numpy = output_train.numpy()

print(confusion_matrix(y_test_numpy, output_test_numpy))
print(classification_report(y_test_numpy, output_test_numpy))

import matplotlib.pyplot as plt

output_test_binary = np.array(output_test_binary)

fpr, tpr, thresholds = roc_curve(y_test_numpy, output_test_binary)
roc_auc = auc(fpr, tpr)

print(f"\nAUC: {roc_auc:.4f}")

plt.figure()
plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (area = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc="lower right")
plt.show()

ValueError: Classification metrics can't handle a mix of binary and continuous targets

### TASK 6

In [None]:
torch.save(model.state_dict(), 'model.pth')

new_model = Model(num_features_entrada=x_train_scaled_tensor.shape[1])
new_model.load_state_dict(torch.load('model.pth'))

new_model.eval()
with torch.no_grad():
    output_train = new_model(x_train_scaled_tensor)
    output_test = new_model(x_test_scaled_tensor)

    for e, x in enumerate(output_test):
        if x >= 0.5:
            output_test[e] = 1
        else:
            output_test[e] = 0

    for e, x in enumerate(output_train):
        if x >= 0.5:
            output_train[e] = 1
        else:
            output_train[e] = 0

    result_test = output_test == y_test_tensor
    result_train = output_train == y_train_tensor
    print(result_test.float().mean())
    print(result_train.float().mean())


y_test_numpy = y_test_tensor.numpy()
y_train_numpy = y_train_tensor.numpy()

output_test_numpy = output_test.numpy()
output_train_numpy = output_train.numpy()

print(confusion_matrix(y_test_numpy, output_test_numpy))
print(classification_report(y_test_numpy, output_test_numpy))

### TASK 7

In [None]:
learning_rates = [0.01, 0.05, 0.1]
EPOCHS = 1000

best_accuracy = 0.0
best_lr = 0.0

for lr in learning_rates:
    
    model = Model(num_features_entrada=x_train_scaled_tensor.shape[1])
    optimizer = optim.SGD(model.parameters(), lr=lr, weight_decay=0.01)

    for epoch in range(EPOCHS):
        model.train()
        optimizer.zero_grad()
        output = model(x_train_scaled_tensor)
        loss = criterion(output, y_train_tensor)
        loss.backward()
        optimizer.step()

    model.eval()
    with torch.no_grad():
        output_test = model(x_test_scaled_tensor)
        
        for e, x in enumerate(output_test):
            if x >= 0.5:
                output_test[e] = 1
            else:
                output_test[e] = 0

        result_test = output_test == y_test_tensor
        current_accuracy = result_test.float().mean().item()

        if current_accuracy > best_accuracy:
            best_accuracy = current_accuracy
            best_lr = lr

print(f"Best Learning Rate: {best_lr} with Accuracy: {best_accuracy:.4f}")

### TASK 8

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

weights = model.linear.weight.data.numpy().flatten()

feature_names = X.columns
feature_importance_df = pd.DataFrame({
    'Feature': feature_names,
    'Importance': weights
})

feature_importance_df['Abs_Importance'] = feature_importance_df['Importance'].abs()
sorted_df = feature_importance_df.sort_values(by='Abs_Importance', ascending=True)

plt.figure(figsize=(10, 12))
plt.barh(sorted_df['Feature'], sorted_df['Importance'])
plt.xlabel('Weight (Importance)')
plt.ylabel('Feature')
plt.title('Feature Importance')
plt.grid(True)
plt.show()