In [1]:
!pip install torch

import torch

if torch.cuda.is_available():
    device = torch.device("cuda")
    print("GPU is available")
else:
    device = torch.device("cpu")
    print("GPU not available, using CPU instead")

GPU is available


In [2]:
from google.colab import drive

drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import pandas as pd

df = pd.read_csv("/content/drive/MyDrive/2023 Fall/fifa_test.csv")

In [4]:
df = df.drop(['0','1','50','7','9','44','45','46','47','48'], axis=1)
df = df[df['2'] != 15.0]

df.head(30)

Unnamed: 0,2,3,4,5,6,8,10,11,12,13,...,35,36,37,38,39,40,41,42,43,49
0,8.0,88.0,192.0,171.0,68.0,8.0,0.0,4.0,4.0,4.0,...,58.0,68.0,85.0,94.0,71.0,88.0,57.0,57.0,56.0,83.0
1,8.0,87.0,204.0,171.0,68.0,8.0,0.0,4.0,4.0,4.0,...,58.0,66.0,84.0,94.0,71.0,89.0,57.0,57.0,56.0,81.0
2,8.0,87.0,216.0,171.0,68.0,8.0,0.0,4.0,4.0,4.0,...,58.0,66.0,84.0,94.0,71.0,89.0,67.0,57.0,56.0,79.0
3,8.0,76.0,228.0,170.0,72.0,17.0,1.0,3.0,2.0,2.0,...,87.0,68.0,67.0,77.0,85.0,73.0,64.0,66.0,65.0,78.0
4,14.0,75.0,180.0,188.0,75.0,13.0,1.0,3.0,2.0,2.0,...,70.0,81.0,29.0,46.0,35.0,67.0,80.0,80.0,76.0,74.0
11,1.0,66.0,240.0,191.0,72.0,13.0,0.0,3.0,2.0,1.0,...,79.0,24.0,67.0,68.0,68.0,66.0,23.0,20.0,19.0,62.0
12,6.0,58.0,252.0,183.0,76.0,11.0,1.0,3.0,3.0,1.0,...,63.0,53.0,62.0,61.0,60.0,71.0,31.0,36.0,34.0,58.0
13,1.0,62.0,264.0,180.0,76.0,11.0,1.0,3.0,3.0,1.0,...,63.0,53.0,62.0,61.0,60.0,71.0,31.0,36.0,34.0,58.0
14,6.0,57.0,276.0,180.0,76.0,11.0,1.0,3.0,2.0,1.0,...,75.0,53.0,62.0,61.0,60.0,71.0,41.0,36.0,34.0,58.0
15,8.0,76.0,192.0,188.0,78.0,7.0,0.0,3.0,3.0,1.0,...,76.0,77.0,76.0,75.0,66.0,69.0,63.0,68.0,58.0,74.0


In [5]:
df.shape

(24052, 41)

In [6]:
x = df.values[:,:-1]
y = df.values[:,-1:]

x.shape, y.shape

((24052, 40), (24052, 1))

In [7]:
y

array([[83.],
       [81.],
       [79.],
       ...,
       [64.],
       [61.],
       [65.]])

In [8]:
import numpy as np

num_samples = x.shape[0]
num_val = int(0.2 * num_samples)

shuffled_samples = torch.randperm(num_samples)

train_indices = shuffled_samples[:-num_val]
val_indices = shuffled_samples[-num_val:]

x_val = x[val_indices]
y_val = y[val_indices]
x_train = x[train_indices]
y_train = y[train_indices]

x_val = torch.from_numpy(x_val).float()
y_val = torch.from_numpy(y_val).float()
x_train = torch.from_numpy(x_train).float()
y_train = torch.from_numpy(y_train).float()
x_val.shape, y_val.shape, x_train.shape, y_train.shape

(torch.Size([4810, 40]),
 torch.Size([4810, 1]),
 torch.Size([19242, 40]),
 torch.Size([19242, 1]))

In [12]:
from torch.utils.data import TensorDataset, DataLoader

train_dataset = TensorDataset(x_train, y_train)
val_dataset = TensorDataset(x_val, y_val)

# Define batch size
batch_size = 64

# Create data loaders
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(dataset=val_dataset, batch_size=batch_size, shuffle=False)

In [9]:
import torch.nn as nn
import torch.optim as optim

class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer_1 = nn.Linear(in_features=40, out_features=2048)
        self.layer_2 = nn.Linear(in_features=2048, out_features=1024)
        self.layer_3 = nn.Linear(in_features=1024, out_features=512)
        self.layer_4 = nn.Linear(in_features=512, out_features=128)
        self.layer_5 = nn.Linear(in_features=128, out_features=32)
        self.layer_6 = nn.Linear(in_features=32, out_features=1)

    def forward(self, x):
        out = torch.relu(self.layer_1(x))
        out = torch.relu(self.layer_2(out))
        out = torch.relu(self.layer_3(out))
        out = torch.relu(self.layer_4(out))
        out = torch.relu(self.layer_5(out))
        out = self.layer_6(out)
        return out

In [18]:
# batch version

def training_loop(n_epochs, optimizer, model, loss_fn, train_loader, val_loader):
    for epoch in range(1, n_epochs + 1):
        # Training
        model.train()
        train_loss = 0.0
        for x_batch, y_batch in train_loader:
            x_batch, y_batch = x_batch.to(device), y_batch.to(device)
            y_train_pred = model(x_batch)
            loss = loss_fn(y_train_pred, y_batch)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            train_loss += loss.item() * x_batch.size(0)

        # Validation
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for x_batch, y_batch in val_loader:
                x_batch, y_batch = x_batch.to(device), y_batch.to(device)
                y_val_pred = model(x_batch)
                loss = loss_fn(y_val_pred, y_batch)
                val_loss += loss.item() * x_batch.size(0)

        # Average losses over all batches
        train_loss /= len(train_loader.dataset)
        val_loss /= len(val_loader.dataset)

        if epoch == 1 or epoch % 10 == 0:
            print(f'Epoch {epoch}, Training loss {train_loss:.4f}, Validation loss {val_loss:.4f}')


In [16]:
# no batch version

def training_loop(n_epochs, optimizer, model, loss_fn,x_train,x_val,y_train,y_val):
    # putting tensors into cpu or cuda
    x_train, y_train = x_train.to(device), y_train.to(device)
    x_val, y_val = x_val.to(device), y_val.to(device)
    model.train()
    for epoch in range(1, n_epochs + 1):
        # train and calculate the loss
        y_train_pred = model(x_train)
        loss_train = loss_fn(y_train_pred, y_train)
        y_val_pred = model(x_val)
        loss_val = loss_fn(y_val_pred, y_val)

        # Auto_grad
        optimizer.zero_grad()
        loss_train.backward()
        optimizer.step()

        if epoch == 1 or epoch % 100 == 0:
            print('Epoch {}, Training loss {}, Validation loss {}'.format(
                epoch, float(loss_train), float(loss_val)))

In [24]:
# batch version
model = Net().to(device)
loss_fn = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

training_loop(
    n_epochs=180,
    optimizer=optimizer,
    model=model,
    loss_fn=loss_fn,
    train_loader = train_loader,
    val_loader = val_loader
)


Epoch 1, Training loss 207.6149, Validation loss 14.8541
Epoch 10, Training loss 12.3795, Validation loss 13.6219
Epoch 20, Training loss 12.4855, Validation loss 11.9748
Epoch 30, Training loss 11.8466, Validation loss 12.3850
Epoch 40, Training loss 11.8670, Validation loss 12.3669
Epoch 50, Training loss 11.4468, Validation loss 12.3778
Epoch 60, Training loss 11.6240, Validation loss 13.2603
Epoch 70, Training loss 11.3307, Validation loss 12.0568
Epoch 80, Training loss 10.8559, Validation loss 12.0361
Epoch 90, Training loss 10.6802, Validation loss 11.8979
Epoch 100, Training loss 10.5815, Validation loss 12.0180
Epoch 110, Training loss 10.3921, Validation loss 12.4875
Epoch 120, Training loss 10.1969, Validation loss 12.0657
Epoch 130, Training loss 10.1459, Validation loss 12.1885
Epoch 140, Training loss 9.8084, Validation loss 12.2642
Epoch 150, Training loss 9.3610, Validation loss 12.0273
Epoch 160, Training loss 9.0807, Validation loss 12.5085
Epoch 170, Training loss 8.

In [None]:
# no batch version

model = Net().to(device)
loss_fn = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

training_loop(
    n_epochs = 5000,
    optimizer = optimizer,
    model = model,
    loss_fn = loss_fn,
    x_train = x_train,
    y_train = y_train,
    x_val = x_val,
    y_val = y_val,
)

Epoch 1, Training loss 4853.5400390625, Validation loss 4861.23193359375
Epoch 100, Training loss 13.969325065612793, Validation loss 13.788715362548828
Epoch 200, Training loss 12.494378089904785, Validation loss 12.311670303344727
Epoch 300, Training loss 12.087322235107422, Validation loss 12.01165771484375
Epoch 400, Training loss 11.882841110229492, Validation loss 11.921810150146484
Epoch 500, Training loss 11.763754844665527, Validation loss 11.932707786560059
Epoch 600, Training loss 11.584094047546387, Validation loss 11.833968162536621
Epoch 700, Training loss 11.465812683105469, Validation loss 11.817806243896484
Epoch 800, Training loss 11.355077743530273, Validation loss 11.79935073852539
Epoch 900, Training loss 11.258380889892578, Validation loss 11.79408073425293
Epoch 1000, Training loss 11.255680084228516, Validation loss 11.839864730834961
Epoch 1100, Training loss 11.319906234741211, Validation loss 12.065107345581055
Epoch 1200, Training loss 11.007658958435059, Va

In [None]:
pred_df = pd.read_csv("/content/drive/MyDrive/2023 Fall/fifa_22test.csv")

In [None]:
pred_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,38,39,40,41,42,43,44,45,46,47
0,41.0,8.0,79.0,252.0,171.0,68.0,109.0,12.0,45.0,1.0,...,71.0,89.0,68.0,53.0,50.0,6.0,13.0,6.0,13.0,7.0
1,1179.0,15.0,80.0,324.0,192.0,92.0,83.0,4.0,27.0,1.0,...,22.0,70.0,13.0,11.0,11.0,76.0,73.0,70.0,90.0,76.0
2,2147.0,15.0,76.0,264.0,197.0,92.0,25.0,4.0,34.0,1.0,...,23.0,59.0,9.0,15.0,14.0,75.0,75.0,75.0,77.0,74.0
3,2702.0,6.0,58.0,312.0,185.0,76.0,678.0,11.0,14.0,0.0,...,60.0,68.0,41.0,36.0,34.0,7.0,11.0,6.0,14.0,6.0
4,3467.0,1.0,71.0,264.0,186.0,78.0,394.0,19.0,54.0,1.0,...,73.0,71.0,41.0,22.0,19.0,9.0,10.0,9.0,9.0,6.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19234,264631.0,7.0,70.0,36.0,176.0,65.0,328.0,11.0,54.0,1.0,...,53.0,55.0,16.0,21.0,27.0,7.0,10.0,11.0,12.0,11.0
19235,264634.0,8.0,66.0,36.0,172.0,65.0,357.0,11.0,60.0,1.0,...,38.0,40.0,40.0,55.0,50.0,8.0,7.0,12.0,12.0,5.0
19236,264638.0,4.0,70.0,36.0,170.0,68.0,410.0,21.0,59.0,1.0,...,61.0,50.0,45.0,45.0,42.0,13.0,12.0,12.0,7.0,6.0
19237,264639.0,3.0,70.0,84.0,175.0,73.0,252.0,21.0,54.0,1.0,...,58.0,60.0,20.0,30.0,27.0,14.0,14.0,14.0,8.0,5.0


In [None]:
pred_df = pred_df.drop(['0','6','8','43','44','45','46','47'], axis=1)
pred_df = pred_df[pred_df['1'] != 15.0]

In [None]:
pred_df_tensor = torch.from_numpy(pred_df.values).float().to(device)

In [None]:
model.eval()

list = []
for player in pred_df_tensor:
  rating = model(player).cpu()
  list.append(rating.detach().numpy())


In [None]:
flattened_list = [item[0] for item in list]
pred_df_output = pd.read_csv("/content/drive/MyDrive/2023 Fall/fifa_22test.csv")
pred_df_output = pred_df_output[pred_df_output['1'] != 15.0]
pred_df_output['rating'] = flattened_list
pred_df_output.to_csv('/content/drive/MyDrive/2023 Fall/rating_output.csv')

In [None]:
torch.save(model.state_dict(), '/content/drive/MyDrive/fifa25.pt')