In [2]:
from libs import *
from model import *
import json

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("device: {}".format(device))

device: cpu


# Test data processing

In [4]:
matches = pd.read_csv("cleaned_imputed_onehot_matches.csv")
M = matches['match_id'].unique()

In [5]:
np.random.seed(0)
np.random.shuffle(M)
train_matches = M[:int(0.9*len(M))]
val_matches = M[int(0.9*len(M)):]

In [20]:
matchs = val_matches
lb = pd.read_csv("cleaned_imputed_onehot_matches.csv")
lb = lb[lb['match_id'].isin(matchs)]

row = lb.iloc[1]
id, point_no = row['match_id'], row['point_no']
id, point_no

point_no = f'point_{point_no}'
with open(f"data/{id}.json", "r") as file:
    data = json.load(file)


In [21]:
np.array(list(data[point_no].values())).shape

(81, 2)

In [22]:
A = np.array(list(data[point_no].values())).T
result = np.zeros((400,81))
result[:A.shape[0],:A.shape[1]] = A

In [8]:
for match_id in M:
    df = matches.loc[(matches['match_id'] == match_id) ,:]
    features = df.columns[4:]
    
    dict = {}
    for current_point in range(1, len(df) + 1):
    # Filter data for the current point_no
        current_data = df[df['point_no'] <= current_point]
        
        dict[f'point_{current_point}'] = {}
        
        for feature in features:
            dict[f'point_{current_point}'][feature] = list(current_data[feature])
    
    file_path = f"data/{match_id}.json"

    with open(file_path, "w") as file:
        json.dump(dict, file)
    
    

# Dataset

In [25]:
# Create dataset object for each point in match
class Tennis_Dataset(Dataset):
    def __init__(self, phase,  path_db, path_lb, matchs):
        super(Tennis_Dataset, self).__init__()
        self.phase = phase
        
        lb = pd.read_csv(path_lb)
        lb = lb[lb['match_id'].isin(matchs)]
        
        self.path_db = path_db
        self.result = lb
        self.output = ['Alpha 1', 'Alpha 2']
        
        self.json_data = {}
        for match_id in matchs:
            json_file_path = os.path.join(self.path_db, f"{match_id}.json")
            with open(json_file_path, "r") as file:
                self.json_data[match_id] = json.load(file)
        
    def __len__(self):
        return len(self.result)
    
    def __getitem__(self, idx):
        row = self.result.iloc[idx]
        id, point_no = row['match_id'], row['point_no']
        point_no = f'point_{point_no}'
        
        data = self.json_data[id]
        point_data = data[point_no]
        
        A = np.array(list(data[point_no].values())).T
        
        result = np.zeros((400,81))
        result[:A.shape[0],:A.shape[1]] = A
        
        labels = row[self.output].to_numpy(dtype=np.float32)
        
        return torch.from_numpy(result).float(), torch.from_numpy(labels).float()
    
    
    

In [26]:
test_code = Tennis_Dataset('train', 'data', "labels_alpha.csv", train_matches[0:1])
test_code_loader = DataLoader(train_dataset, batch_size= 3, shuffle=False)
X, y = next(iter(test_code_loader))

In [115]:
train_dataset = Tennis_Dataset('train', 'data', "labels_alpha.csv", train_matches)
val_dataset = Tennis_Dataset('val', 'data', "labels_alpha.csv", val_matches)

In [37]:
# Calculate the lengths of the train and validation datasets
train_dataset_length = len(train_dataset)
val_dataset_length = len(val_dataset)

# Calculate the percentage of validation dataset
percent_validation = (val_dataset_length / (train_dataset_length + val_dataset_length)) * 100

# Print the percentage of validation dataset
print(f"The percentage of validation dataset is: {percent_validation:.2f}%")


# Model for training

## Resnet

In [38]:
from model import resnet_1d

input_size = (81, 400)
net = resnet_1d.ResNet18(num_classes=2, channels=81).to(device)
# Print the summary
summary(net, input_size=input_size)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv1d-1              [-1, 64, 200]          77,760
       BatchNorm1d-2              [-1, 64, 200]             128
              ReLU-3              [-1, 64, 200]               0
         MaxPool1d-4              [-1, 64, 100]               0
            Conv1d-5              [-1, 64, 100]          28,672
       BatchNorm1d-6              [-1, 64, 100]             128
              ReLU-7              [-1, 64, 100]               0
           Dropout-8              [-1, 64, 100]               0
            Conv1d-9              [-1, 64, 100]          28,672
      BatchNorm1d-10              [-1, 64, 100]             128
             ReLU-11              [-1, 64, 100]               0
            Block-12              [-1, 64, 100]               0
           Conv1d-13              [-1, 64, 100]          28,672
      BatchNorm1d-14              [-1, 

## RNN - LSTM

### RNN

In [52]:
class RegressionRNN(nn.Module):
    def __init__(self, num_sensors, hidden_units):
        super().__init__()
        self.num_sensors = num_sensors  # number of features
        self.hidden_units = hidden_units
        self.num_layers = 3
        
        self.RNN = nn.RNN(
            input_size=num_sensors,
            hidden_size=hidden_units,
            batch_first=True,
            num_layers=self.num_layers,
            dropout=0.5
        )

        self.linear = nn.Linear(in_features=self.hidden_units, out_features=2)

    def forward(self, x):
        batch_size = x.shape[0]
        h0 = torch.zeros(self.num_layers, batch_size, self.hidden_units).requires_grad_()
        
        output, hn = self.RNN(x, h0)
        out = self.linear(hn[0]).flatten()

        return out

In [53]:
learning_rate = 5e-5
num_hidden_units = 16

model_RNN = RegressionRNN(num_sensors= 81, hidden_units=num_hidden_units)
loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(model_RNN.parameters(), lr=learning_rate)

In [54]:
model_RNN(X)

tensor([-0.2550, -0.0990, -0.2550, -0.0990, -0.2550, -0.0990],
       grad_fn=<ReshapeAliasBackward0>)

In [55]:
import torchinfo
torchinfo.summary(model_RNN, (1, 400, 81), device="cpu")

Layer (type:depth-idx)                   Output Shape              Param #
RegressionRNN                            [2]                       --
├─RNN: 1-1                               [1, 400, 16]              2,672
├─Linear: 1-2                            [1, 2]                    34
Total params: 2,706
Trainable params: 2,706
Non-trainable params: 0
Total mult-adds (M): 1.07
Input size (MB): 0.13
Forward/backward pass size (MB): 0.05
Params size (MB): 0.01
Estimated Total Size (MB): 0.19

### GRU

In [56]:
class RegressionGRU(nn.Module):
    def __init__(self, num_sensors, hidden_units):
        super().__init__()
        self.num_sensors = num_sensors  # number of features
        self.hidden_units = hidden_units
        self.num_layers = 3
        
        self.GRU = nn.GRU(
            input_size=num_sensors,
            hidden_size=hidden_units,
            batch_first=True,
            num_layers=self.num_layers,
            dropout=0.5
        )

        self.linear = nn.Linear(in_features=self.hidden_units, out_features=2)

    def forward(self, x):
        batch_size = x.shape[0]
        h0 = torch.zeros(self.num_layers, batch_size, self.hidden_units).requires_grad_()
        
        output, hn = self.GRU(x, h0)
        out = self.linear(hn[0]).flatten()

        return out

In [57]:
learning_rate = 5e-5
num_hidden_units = 16

model_GRU = RegressionGRU(num_sensors= 81, hidden_units=num_hidden_units)
loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(model_GRU.parameters(), lr=learning_rate)

In [58]:
model_GRU(X)

tensor([-0.3504, -0.3662, -0.3504, -0.3662, -0.3504, -0.3662],
       grad_fn=<ReshapeAliasBackward0>)

In [59]:
import torchinfo
torchinfo.summary(model_GRU, (1, 400, 81), device="cpu")

Layer (type:depth-idx)                   Output Shape              Param #
RegressionGRU                            [2]                       --
├─GRU: 1-1                               [1, 400, 16]              8,016
├─Linear: 1-2                            [1, 2]                    34
Total params: 8,050
Trainable params: 8,050
Non-trainable params: 0
Total mult-adds (M): 3.21
Input size (MB): 0.13
Forward/backward pass size (MB): 0.05
Params size (MB): 0.03
Estimated Total Size (MB): 0.21

### LSTM

In [60]:
class RegressionLSTM(nn.Module):
    def __init__(self, num_sensors, hidden_units):
        super().__init__()
        self.num_sensors = num_sensors  # number of features
        self.hidden_units = hidden_units
        self.num_layers = 3

        self.lstm = nn.LSTM(
            input_size=num_sensors,
            hidden_size=hidden_units,
            batch_first=True,
            num_layers=self.num_layers,
            dropout=0.5
        )

        self.linear = nn.Linear(in_features=self.hidden_units, out_features=2)

    def forward(self, x):
        batch_size = x.shape[0]
        h0 = torch.zeros(self.num_layers, batch_size, self.hidden_units).requires_grad_()
        c0 = torch.zeros(self.num_layers, batch_size, self.hidden_units).requires_grad_()
        
        output, (hn, cn) = self.lstm(x, (h0, c0))
        out = self.linear(hn[0]).flatten()

        return out

In [61]:
learning_rate = 5e-5
num_hidden_units = 16

model_LSTM = RegressionLSTM(num_sensors= 81, hidden_units=num_hidden_units)
loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(model_LSTM.parameters(), lr=learning_rate)

In [62]:
model_LSTM(X)

tensor([0.1854, 0.0052, 0.1854, 0.0052, 0.1854, 0.0052],
       grad_fn=<ReshapeAliasBackward0>)

In [63]:
import torchinfo
torchinfo.summary(model_LSTM, (1, 400, 81), device="cpu")

Layer (type:depth-idx)                   Output Shape              Param #
RegressionLSTM                           [2]                       --
├─LSTM: 1-1                              [1, 400, 16]              10,688
├─Linear: 1-2                            [1, 2]                    34
Total params: 10,722
Trainable params: 10,722
Non-trainable params: 0
Total mult-adds (M): 4.28
Input size (MB): 0.13
Forward/backward pass size (MB): 0.05
Params size (MB): 0.04
Estimated Total Size (MB): 0.22

# Training

In [35]:
def train_model(data_loader, model, loss_function, optimizer):
    num_batches = len(data_loader)
    total_loss = 0
    model.train()
    
    for X, y in data_loader:
        output = model(X)
        loss = loss_function(output, y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    avg_loss = total_loss / num_batches
    print(f"Train loss: {avg_loss}")

In [36]:
def test_model(data_loader, model, loss_function):
    
    num_batches = len(data_loader)
    total_loss = 0

    model.eval()
    with torch.no_grad():
        for X, y in data_loader:
            output = model(X)
            total_loss += loss_function(model(X), y.reshape(-1)).item()

    avg_loss = total_loss / num_batches
    print(f"Test loss: {avg_loss}")

In [37]:
print("Untrained test\n--------")
test_model(train_loader, model, loss_function)
print()

Untrained test
--------
Test loss: 0.00016669001650168665

