In [1]:
import torch
import pandas as pd
import os
import sqlite3
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.preprocessing import OneHotEncoder

In [None]:
# frame_id: 0
# event_subtype_id: 0
# frame_start:0
# frame_end: 0
# player_id: 0
# x_start: 0
# y_start: 0
# x_end: 0
# y_end: 0
# // team_id: 0
# player_in_possession_id: 0
# player_possession_frame_start: 0
# player_in_possession_x_start: 0
# player_in_possession_y_start: 0
# player_in_possession_x_end: 0
# player_in_possession_y_end: 0

In [2]:
MATCH_ID = "978805"
EVENT_DATA_DIR = "myairbridge-6YJaD9uGRDr/machine learning data/events"
MATCH_ID_LIST = [ i.split("_")[0] for i in os.listdir(EVENT_DATA_DIR) ]
TRACKING_DATA_DIR = "myairbridge-6YJaD9uGRDr/machine learning data/tracking"
N_EPOCHS = 10

In [16]:
def load_events(m_id:str) -> pd.DataFrame:
    events_raw = pd.read_csv(os.path.join(EVENT_DATA_DIR, f"{m_id}_events.csv"), index_col=0,)
    events = events_raw[["frame_start", "frame_end", "event_subtype", "player_id"]]

    return events

In [12]:
def load_tracking(m_id:str) -> pd.DataFrame:
    tracking_raw = pd.read_csv(os.path.join(TRACKING_DATA_DIR, f"{m_id}_tracking.csv"))
    tracking = tracking_raw.drop(columns=["match_id", "half", "timestamp"])
    tracking_ball = tracking.drop(columns=["extrapolated"])
    return tracking_ball

In [20]:
events = load_events(MATCH_ID)
tracking_data = load_tracking(MATCH_ID)

In [25]:
events

Unnamed: 0_level_0,frame_start,frame_end,event_subtype,player_id
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,320,350,pulling_wide,7322
1,356,370,pulling_half_space,617441
2,360,377,behind,179287
3,440,451,run_ahead_of_the_ball,26200
4,446,466,run_ahead_of_the_ball,30561
...,...,...,...,...
419,56497,56567,cross_receiver,530099
420,56501,56567,support,33844
421,57445,57460,support,6343
422,57510,57520,run_ahead_of_the_ball,530099


In [None]:
tracking_data

In [31]:
def preprocess(events:pd.DataFrame, tracking:pd.DataFrame, obj_id:int=0) -> pd.DataFrame:
    conn = sqlite3.connect(':memory:')
    #write the tables
    e = events.loc[events.player_id == obj_id]
    e.to_sql('e', conn, index=False)
    t  = tracking.loc[tracking.object_id == obj_id]
    t.to_sql('t', conn, index=False)

    print(len(e))

    qry = '''
        select 
            *
            -- x,y,z, frame_id, event_subtype
        from
            e right join t on
            t.frame_id between frame_start and frame_end
        '''
    
    df = pd.read_sql_query(qry, conn)

    return df
df = preprocess(events, tracking_data, 7322)

25


In [None]:
df

In [None]:
df.sort_values(by="frame_id")

In [74]:
events.loc[events.player_id == 7322]

Unnamed: 0_level_0,frame_start,frame_end,event_subtype,player_id
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,320,350,pulling_wide,7322
28,3390,3434,cross_receiver,7322
37,5792,5812,support,7322
60,8356,8368,support,7322
71,9495,9505,run_ahead_of_the_ball,7322
88,12693,12729,run_ahead_of_the_ball,7322
96,14099,14117,support,7322
104,14800,14841,run_ahead_of_the_ball,7322
107,15638,15645,pulling_wide,7322
117,16715,16735,run_ahead_of_the_ball,7322


In [69]:
df.sort_values(by="frame_id").loc[df.frame_id.between(250, 500)]

Unnamed: 0,frame_start,frame_end,event_subtype,player_id,frame_id,object_id,x,y,z
720,,,,,250,7322,3.27,-11.56,0.0
721,,,,,251,7322,3.20,-11.44,0.0
722,,,,,252,7322,3.12,-11.32,0.0
723,,,,,253,7322,3.03,-11.20,0.0
724,,,,,254,7322,2.93,-11.08,0.0
...,...,...,...,...,...,...,...,...,...
935,,,,,496,7322,-1.05,-22.74,0.0
936,,,,,497,7322,-0.62,-22.51,0.0
937,,,,,498,7322,-0.17,-22.27,0.0
938,,,,,499,7322,0.30,-22.02,0.0


In [77]:
import plotly.express as px

px.scatter_3d(df.sort_values(by="frame_id").loc[df.frame_id.between(9460, 9540)], x='x', y='y', z='frame_id')

In [32]:
X = df[['x', 'y', 'z']]
y = df['event_subtype']

In [None]:
(y.value_counts()/len(y)).plot(kind='bar')

In [34]:
X_tensor = torch.tensor(X.values, dtype=torch.float32).unsqueeze(0)

In [35]:
classes = y.unique()
n_classes = len(classes)

In [37]:
n_classes

8

In [38]:
ohe = OneHotEncoder()
y_ohe = torch.tensor(ohe.fit_transform(y.values.reshape(-1, 1)).toarray()).unsqueeze(0)

In [47]:
y_ohe.shape

torch.Size([1, 39660, 8])

In [40]:
class BiLSTM(nn.Module):

    def __init__(self, input_dim:int, hidden_dim:int, batch_size:int, output_dim:int=11, num_layers:int=2):
        super(BiLSTM, self).__init__()
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.batch_size = batch_size
        self.num_layers = num_layers

        #Define the initial linear hidden layer
        self.init_linear = nn.Linear(self.input_dim, self.input_dim)

        # Define the LSTM layer
        self.lstm = nn.LSTM(self.input_dim, self.hidden_dim, self.num_layers, batch_first=True, bidirectional=True)

        # Define the output layer
        self.linear = nn.Linear(self.hidden_dim * 2, output_dim)

    def init_hidden(self):
        # This is what we'll initialise our hidden state as
        return (torch.zeros(self.num_layers, self.batch_size, self.hidden_dim),
                torch.zeros(self.num_layers, self.batch_size, self.hidden_dim))

    def forward(self, input):
        #Forward pass through initial hidden layer
        linear_input = self.init_linear(input)

        # Forward pass through LSTM layer
        # shape of lstm_out: [batch_size, input_size ,hidden_dim]
        # shape of self.hidden: (a, b), where a and b both
        # have shape (batch_size, num_layers, hidden_dim).
        lstm_out, self.hidden = self.lstm(linear_input)

        # Can pass on the entirety of lstm_out to the next layer if it is a seq2seq prediction
        y_pred = self.linear(lstm_out)
        return y_pred

In [48]:
bilstm = BiLSTM(3, 16, 16, n_classes)
optimizer = optim.SGD(bilstm.parameters(), lr=0.001, momentum=0.9)
criterion = nn.CrossEntropyLoss()

In [49]:
y_ohe.shape

torch.Size([1, 39660, 8])

In [46]:
out.shape

torch.Size([1, 39660, 11])

In [50]:
for i in range(100):
    
    out = bilstm(X_tensor[:,:])
    optimizer.zero_grad()
    loss = criterion(out, y_ohe[:,:])
    loss.backward()
    print(loss)
    optimizer.step()

    # accuracy
    _, predicted = torch.max(out, 1)

tensor(52509.7419, dtype=torch.float64, grad_fn=<DivBackward1>)
tensor(52496.3743, dtype=torch.float64, grad_fn=<DivBackward1>)
tensor(52495.2394, dtype=torch.float64, grad_fn=<DivBackward1>)
tensor(52492.4718, dtype=torch.float64, grad_fn=<DivBackward1>)
tensor(52491.6703, dtype=torch.float64, grad_fn=<DivBackward1>)
tensor(52491.7796, dtype=torch.float64, grad_fn=<DivBackward1>)
tensor(52489.6956, dtype=torch.float64, grad_fn=<DivBackward1>)
tensor(52490.1283, dtype=torch.float64, grad_fn=<DivBackward1>)
tensor(52490.2066, dtype=torch.float64, grad_fn=<DivBackward1>)
tensor(52488.2787, dtype=torch.float64, grad_fn=<DivBackward1>)
tensor(52488.5785, dtype=torch.float64, grad_fn=<DivBackward1>)
tensor(52486.7140, dtype=torch.float64, grad_fn=<DivBackward1>)
tensor(52486.9158, dtype=torch.float64, grad_fn=<DivBackward1>)
tensor(52485.8334, dtype=torch.float64, grad_fn=<DivBackward1>)
tensor(52484.6396, dtype=torch.float64, grad_fn=<DivBackward1>)
tensor(52484.0284, dtype=torch.float64, 

KeyboardInterrupt: 

In [29]:
out.shape

torch.Size([1, 43468, 11])

In [41]:
y_hat = out.argmax(dim=2)

In [42]:
y_idx = y_ohe.argmax(dim=2)

In [53]:
(y_hat == y_idx).sum()/y_hat.shape[1]

tensor(0.0136)