In [0]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import pandas as pd
import numpy as np
from google.colab import drive
import os

In [0]:
#import os, stat
#import shutil

#def remove_readonly(func, path, _):
#    "Clear the readonly bit and reattempt the removal"
#    os.chmod(path, stat.S_IWRITE)
#    func(path)

#shutil.rmtree('/content/fpl_prediction', onerror=remove_readonly)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
drive.mount('/content/drive')
repo_path = "/content/fpl_prediction/"
if not os.path.isdir(repo_path):
  !git clone --recurse-submodule https://github.com/SolomonAduolMaina/fpl_prediction

In [0]:
directory_string = '/content/fpl_prediction/Fantasy-Premier-League/data/20{0}-{1}/players/'
players_data = {}
player_gameweek_index = {}
index_count = 0
fields = ['assists', 'bonus', 'bps', 'clean_sheets', 'creativity', 'goals_conceded', 'goals_scored', 'ict_index', 'influence', 'minutes', 'opponent_team', 'own_goals', 'penalties_missed', 'penalties_saved', 'red_cards', 'saves', 'team_a_score', 'team_h_score', 'threat', 'total_points', 'value', 'was_home', 'yellow_cards']

for season in range(0, 4):
  formatted_string = directory_string.format(season + 16, season + 16 + 1)
  directory = os.fsencode(formatted_string)

  for file in os.listdir(directory):
    filename = os.fsdecode(file)
    name = " ".join(filename.split('_')[:2])

    for game_week in range(0, 38):
      player_gameweek_index[(name, season, game_week)] = index_count
      index_count = index_count + 1

    csv = pd.read_csv(formatted_string + filename + '/gw.csv', encoding = "ISO-8859-1")
    csv = csv[fields]
    index = pd.Series([player_gameweek_index[name, season, game_week] for game_week in range(len(csv))])
    csv['index'] = index
    csv = csv.astype('float')
    features = torch.Tensor(csv.drop(['total_points'], axis=1).to_numpy())
    total_points = torch.Tensor(csv.drop(csv.columns.difference(['total_points']), axis=1).to_numpy())

    if name not in players_data:
      players_data[name] = (features, total_points)
    else:
      players_data[name] = (torch.cat([players_data[name][0], features]), torch.cat([players_data[name][1], total_points]))

In [0]:
class LSTMPredictor(nn.Module):
  def __init__(self, embedding_dim, hidden_dim):
    super(LSTMPredictor, self).__init__()
    self.hidden_dim = hidden_dim
    self.lstm = nn.LSTM(embedding_dim, hidden_dim)
    self.hidden2points = nn.Linear(hidden_dim, 1)
    self.hidden = self.init_hidden()

  def init_hidden(self):
    return (torch.zeros(1, 1, self.hidden_dim), torch.zeros(1, 1, self.hidden_dim))

  def forward(self, x):
    lstm_out, self.hidden = self.lstm(x.view(len(x), 1, -1), self.hidden)
    points = self.hidden2points(lstm_out.view(len(x), -1))
    return points

In [0]:
hidden_dim = 100
model = LSTMPredictor(23, hidden_dim)

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters())

for epoch in range(1):
  for player in players_data:
    features, total_points = (players_data[player][0][:-1], players_data[player][1][1:])

    if len(features) > 0:
      model.zero_grad()
      model.hidden = model.init_hidden()
      pred = model(features.view(len(features), 1, -1))
      loss = criterion(pred, total_points)
      loss.backward()
      optimizer.step()