In [152]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
import torch.nn.functional as F

%matplotlib inline

In [153]:
# torch.manual_seed(4); # setting the rng

# inputs = 30
# n_hidden = 100

# model = torch.nn.Sequential(
#         torch.nn.Linear(inputs, n_hidden, bias=False), torch.nn.BatchNorm1d(n_hidden), torch.nn.Tanh(),
#         torch.nn.Linear(n_hidden, n_hidden, bias=False), torch.nn.BatchNorm1d(n_hidden), torch.nn.Tanh(),
#         torch.nn.Linear(n_hidden, n_hidden, bias=False), torch.nn.BatchNorm1d(n_hidden), torch.nn.Tanh(),
#         torch.nn.Linear(n_hidden, 3)
# )

In [154]:
PATH = "../src/model/trained_models/3_linear_layer.pt"
# model.load_state_dict(torch.load(PATH))
model = torch.load(PATH)
model.train()

Sequential(
  (0): Linear(in_features=30, out_features=100, bias=False)
  (1): BatchNorm1d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): Tanh()
  (3): Linear(in_features=100, out_features=100, bias=False)
  (4): BatchNorm1d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (5): Tanh()
  (6): Linear(in_features=100, out_features=100, bias=False)
  (7): BatchNorm1d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (8): Tanh()
  (9): Linear(in_features=100, out_features=3, bias=True)
)

In [155]:
# for layer in model:
#     if type(layer)==torch.nn.BatchNorm1d:
#         layer.track_running_stats = False
# model.eval()

## Testing Evaluation

In [156]:
class Loader:

    def __init__(self, files):
        self.files = files

    def get_data(self):
        dfs = []
        for file in self.files:
            df = self.load_past_matches(file)
            dfs.append(df)
        
        df_join = self.join_data(dfs[0], dfs[1])

        return df_join

    def load_past_matches(self, file):
        df = pd.read_csv(f'../data/{file}')
        df.drop('Unnamed: 0', axis=1, inplace=True)
        df['date'] = pd.to_datetime(df['date']).dt.date

        return df

    def join_data(self, df1, df2):
        df = pd.merge(df1, df2,  how='inner',
            left_on=['league', 'date','team', 'opponent', 'home'],
            right_on=['league', 'date','team', 'opponent', 'home'])
        df.sort_values(by=['date', 'league', 'team', 'opponent'], inplace=True)
              
        return df

In [157]:
FILES = ["elos_matches.csv", "goals_matches.csv"]
loader = Loader(FILES)
data = loader.get_data()

In [160]:
# data.drop(['team_goals_scored',
#            'opponent_goals_scored',
#            'team_goals_conceded',
#            'opponent_goals_conceded'], axis=1, inplace=True)

In [161]:
def build_dataset(df):
    
    df = df.drop(['league', 'date', 'team', 'opponent'], axis=1)
    X = df.drop(['result'], axis=1).to_numpy()
    Y = np.array(df['result']) / 0.5
    
    X = torch.tensor(X).float()
    Y = torch.tensor(Y).long()
    
    
    return X, Y

In [162]:
n1 = int(0.8 * len(data))
n2 = int(0.9 * len(data))

Xtr, Ytr = build_dataset(data[:n1])
Xdev, Ydev = build_dataset(data[n1:n2])
Xte, Yte = build_dataset(data[n2:])

In [163]:
@torch.no_grad()
def split_loss(split):
    x, y = {
        'train': [Xtr, Ytr],
        'val'  : [Xdev, Ydev],
        'test' : [Xte, Yte]
    }[split]
    
    logits = model(x)
    loss = F.cross_entropy(logits, y)
    print(split, loss.item())

In [164]:
split_loss('train')
split_loss('val')

train 0.8832699656486511
val 0.8951374888420105


In [165]:
@torch.no_grad()
def accuracy(split):
    x, y = {
        'train': [Xtr, Ytr],
        'val'  : [Xdev, Ydev],
        'test' : [Xte, Yte]
    }[split]
    
    logits = model(x)
    preds = []
    preds = torch.argmax(logits, dim=1)

    i = 0
    for pred, true in zip(preds, y):
        if pred == true:
            i += 1
    
    print(f"----{split}----")
    print(f"Correctly predicted {i} out of {y.shape[0]} in {split}.")
    print(f"{i / y.shape[0]:.4f}")
    print(f"Guessing would give an accuracy of {1 / len(torch.unique(y))}")

In [166]:
accuracy('train')
accuracy('val')

----train----
Correctly predicted 39142 out of 66899 in train.
0.5851
Guessing would give an accuracy of 0.3333333333333333
----val----
Correctly predicted 4777 out of 8362 in val.
0.5713
Guessing would give an accuracy of 0.3333333333333333


## Future Data

In [190]:
future = pd.read_csv("../data/future_matches_processed.csv")
future.drop(["Unnamed: 0"], axis=1, inplace=True)
future.head()

Unnamed: 0,date,team,opponent,league,home,elo_team,team_goals_scored_avg,team_goals_conceded_avg,team_goals_scored_avg_home,team_goals_conceded_avg_home,...,opponent_goals_scored_avg,opponent_goals_conceded_avg,opponent_goals_scored_avg_home,opponent_goals_conceded_avg_home,opponent_goals_scored_avg_away,opponent_goals_conceded_avg_away,opponent_attack_strength,opponent_defense_strength,opponent_lambda,elo_diff
0,2023-01-08,siena,reggiana,"Serie C, Girone B",0,1467.979373,1.0,0.473684,1.105263,0.842105,...,1.263158,0.947368,2.105263,0.578947,1.210526,1.210526,1.266442,1.331404,1.333097,-56.380741
1,2023-01-08,san_donato_tavarnelle,sassari_torres,"Serie C, Girone B",1,1427.856072,0.842105,1.421053,1.947368,1.315789,...,0.789474,0.842105,1.210526,0.947368,1.0,0.947368,1.354047,1.06966,1.496579,-29.843999
2,2023-01-08,gubbio,fermana,"Serie C, Girone B",1,1496.198811,1.315789,0.842105,1.631579,1.0,...,0.842105,1.421053,0.947368,0.894737,0.684211,1.421053,1.065965,0.998369,1.009861,130.139051
3,2023-01-08,imolese,lucchese,"Serie C, Girone B",1,1396.316566,0.736842,1.368421,0.842105,1.157895,...,1.052632,0.736842,1.368421,0.789474,0.631579,0.789474,0.467593,0.585579,0.442983,-89.081573
4,2023-01-08,olbia,aquila_montevarchi,"Serie C, Girone B",1,1425.094277,1.105263,1.210526,0.947368,1.105263,...,0.789474,1.263158,1.473684,0.947368,0.421053,1.789474,0.442941,2.063733,0.582817,29.769997


In [206]:
future.shape

(360, 34)

In [207]:
def future_to_tensor(df):
    X = df.drop(['league', 'date', 'team', 'opponent'], axis=1).to_numpy()
    X = torch.tensor(X).float()
    
    return X

In [211]:
future_pred = future_to_tensor(future)

In [221]:
@torch.no_grad()
def predict(x):
    logits = model(x)
    preds = torch.softmax(logits, dim=1)
#     preds = []
#     preds = torch.argmax(logits, dim=1)
    
    return preds

In [222]:
predictions = predict(future_pred)

In [223]:
predictions

tensor([[0.1343, 0.5183, 0.3474],
        [0.1861, 0.3422, 0.4717],
        [0.8089, 0.1052, 0.0859],
        ...,
        [0.0951, 0.2331, 0.6718],
        [0.7645, 0.1237, 0.1118],
        [0.8027, 0.1208, 0.0764]])