# Team as an entity model

This model is a

In [34]:
# Import
from torch import nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset, random_split
from team_dataloader import *

# Model

In [35]:
class TeamAsEntity(nn.Module):
    def __init__(self, input_size, hidden_size, dropout=0):
        super(TeamAsEntity, self).__init__()

        self.input_size = input_size
        self.hidden_size = hidden_size

        # LSTM input
        # Input size: size of each element in time series
        # Hidden size: size of LSTM cell output and hidden output
        # Num layers: number of stacked LSTM cells (vertical, not horizontal). Can also use multiple explicit LSTM layers
        # (https://stackoverflow.com/questions/49224413/difference-between-1-lstm-with-num-layers-2-and-2-lstms-in-pytorch)
        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=1, batch_first=True, dropout=dropout)

        # midpoint betweem LSTM and output layer
        self.mid = nn.Sequential(
            nn.Linear(hidden_size, 4*hidden_size),
            nn.Tanh(),
            nn.Linear(4*hidden_size, 4*input_size),
            nn.Tanh()
        )

        # Output must be same size as input (predictions for all features)
        self.out = nn.Linear(4*input_size, input_size)

    def forward(self, input, hidden):
        output, hidden = self.lstm(input, hidden)
        output = self.mid(output)
        output = self.out(output)
        return output, hidden

# Dataset

In [36]:
# Data Paths
file = './Data/team/processed/team_data.xlsx'

# GET DESIRED FIELDS
# Season 
# Name
# Points Percentage : PTS%
# Win Percentage : Win%
# Goals For Per Game : GF/G
# Goals Against Per Game : GA/G
cols = ['team name', 'Season', 'PTS%', 'GF/G', 'GA/G']
df = pd.read_excel(file, header=0, usecols=cols)

# Remove the asterix from the team names
df = df.replace('\*', '', regex=True)

dataset = TeamDataset(df, N=5, start_season=1990, stop_season=2023)

# Dataloader

In [37]:
batch_size = 100
test_length = len(dataset)//10
train_length = len(dataset) - test_length

print(test_length)
print(train_length)
dataset_test, dataset_train = random_split(dataset, [test_length, train_length])


# Must generate batches of sequence data with the following format:
# (batch_size, num_seasons(N), input_size(num stats))
# (https://stackoverflow.com/questions/49466894/how-to-correctly-give-inputs-to-embedding-lstm-and-linear-layers-in-pytorch/49473068#49473068)
dataloader_train = DataLoader(dataset_train, batch_size=batch_size, shuffle=True)
dataloader_test = DataLoader(dataset_test, batch_size=batch_size, shuffle=True)

63
574


# Training

In [38]:
# NOTE: N will be variable, as the input size
hidden_size = 50
model = TeamAsEntity(input_size=len(cols)-2, hidden_size= 50) 

In [39]:
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(),lr=0.0001)
epochs = 1500

In [44]:
N=5 # TODO: change dataloader dynamically based on multiple N. Create a list of dataloader and mix-n-match the N accross batches

#training loop
# initialize the hidden state.
hidden = (torch.randn(1, 1, 3),
          torch.randn(1, 1, 3))
#for i in range(epochs):
for j,data in enumerate(dataloader_train):
    print(j)
    print(len(data[0]))
    #print(data[0])
    #print(data[:][0])
    #print(data[:][0].view(-1,10,1))
    #y_pred = model(data[:][0].view(-1,10,1), hidden).reshape(-1)
    #loss = criterion(y_pred,data[:][1])
    #loss.backward()
    #optimizer.step()
#if i%50 == 0:
#    print(i,"th iteration : ",loss)

0
2
1
2
2
2
3
2
4
2
5
2


# Plot Results

In [41]:
import matplotlib.pyplot as plt
plt.plot(logger.metrics['training_loss'])
plt.plot(logger.metrics['val_loss'], marker='o')

NameError: name 'logger' is not defined