In [1]:
import numpy as np
import pandas as pd

import operator as op
import sqlite3

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score, root_mean_squared_error
from collections import defaultdict
from tqdm.notebook import tqdm

from utils.get_or_create_combined_database import get_or_create_combined_database
from utils.create_compound_key_and_index import create_compound_key_and_index
from utils.create_sequences_in_batches import calculate_sequences_in_batches
from utils.create_sequences_in_batches import create_sequences_from_database_rows
from utils.compare_models import compare_models
from utils.get_data import fetch_data_batches, clear_cache

from concurrent.futures import ProcessPoolExecutor

from constants import DB_columns, GAME_AREA_WIDTH, DEFAULT_DATA_FEATURES

from matplotlib import pyplot as plt

import os
from dotenv import load_dotenv
load_dotenv(verbose=True, override=True)

RECREATE_CLEANED_DATA = False

zoom_range = ((75, 14350), (75, 14350))
normalized_zoom_range = ((0, 1), (0, 1))



# Data

In [2]:
database_folder = os.getenv("DATABASE_FOLDER")

database_file = get_or_create_combined_database(database_folder)

table_name = "champs_cleaned"

data_features = [DB_columns.NORMALIZED_POS_X.value,
                 DB_columns.NORMALIZED_POS_Z.value]

total_keys_to_fetch = 5000

H_values = [200]
T_values = [10]

Found 2 database files in the folder specified by DATABASE_FOLDER
Found combined database /u/23/tarpill1/unix/Documents/combined2.db


In [3]:
if RECREATE_CLEANED_DATA:
    conn = sqlite3.connect(database_file)

    cursor = conn.cursor()

    clear_cache(cursor)

    tlol_db_table_name = "champs"
    cleaned_table_name = table_name

    # Drop previous table

    cursor.execute(f"DROP TABLE IF EXISTS {cleaned_table_name}")

    conn.commit()

    cursor.execute(

        f"CREATE TABLE {cleaned_table_name} AS SELECT * FROM {tlol_db_table_name} WHERE 1=0")

    # Add normalized columns

    cursor.execute(

        f"ALTER TABLE {cleaned_table_name} ADD COLUMN {DB_columns.NORMALIZED_POS_X.value} FLOAT GENERATED ALWAYS AS ({DB_columns.POS_X.value} / {GAME_AREA_WIDTH}) STORED")

    cursor.execute(

        f"ALTER TABLE {cleaned_table_name} ADD COLUMN {DB_columns.NORMALIZED_POS_Z.value} FLOAT GENERATED ALWAYS AS ({DB_columns.POS_Z.value} / {GAME_AREA_WIDTH}) STORED")

    conn.commit()

    conn.close()

    # Add data to the new table from original table according to a filter

    # Conditions:

    # Only rows with a name that is not empty

    not_empty_name = f"{DB_columns.NAME.value} IS NOT ''"

    # Only rows with a name that is not "Turret"

    # Only rows with timestamp greater than 5

    timestamp_greater_than_5 = f"{DB_columns.TIME.value} > 5"

    # Only rows with pos_x and pos_y greater between [0, GAME_AREA_WIDTH]

    pos_x_greater_than_0 = f"{DB_columns.POS_X.value} > 0"

    pos_x_less_than_max = f"{DB_columns.POS_X.value} < {GAME_AREA_WIDTH}"

    pos_z_greater_than_0 = f"{DB_columns.POS_Z.value} > 0"

    pos_z_less_than_max = f"{DB_columns.POS_Z.value} < {GAME_AREA_WIDTH}"

    position_between_0_and_max = " AND ".join(

        [pos_x_greater_than_0, pos_x_less_than_max, pos_z_greater_than_0, pos_z_less_than_max])

    # Combine all above filters

    filter_conditions = " AND ".join(

        [not_empty_name, timestamp_greater_than_5, position_between_0_and_max])

    conn = sqlite3.connect(database_file)

    cursor = conn.cursor()

    cursor.execute(

        f"INSERT INTO {cleaned_table_name} SELECT * FROM {tlol_db_table_name} WHERE {filter_conditions}")

    conn.commit()

    conn.close()

In [4]:
# Check values from the new table

conn = sqlite3.connect(database_file)
cursor = conn.cursor()

page_size = 5

rows = cursor.execute(
    f"SELECT * FROM {table_name} LIMIT {page_size}").fetchall()

conn.close()

rows

[('Mordekaiser',
  604.0,
  612.0,
  5.5419455,
  645.0,
  100,
  2841236401,
  '2841236401_100_Mordekaiser',
  0.040266666666666666,
  0.0408),
 ('Viego',
  786.0,
  436.0,
  5.5419455,
  630.0,
  100,
  2841236401,
  '2841236401_100_Viego',
  0.0524,
  0.029066666666666668),
 ('Riven',
  364.0,
  136.0,
  5.5419455,
  745.0,
  100,
  2841236401,
  '2841236401_100_Riven',
  0.024266666666666666,
  0.009066666666666667),
 ('Ezreal',
  132.0,
  402.0,
  5.5419455,
  600.0,
  100,
  2841236401,
  '2841236401_100_Ezreal',
  0.0088,
  0.0268),
 ('Leblanc',
  298.0,
  676.0,
  5.5419455,
  598.0,
  100,
  2841236401,
  '2841236401_100_Leblanc',
  0.019866666666666668,
  0.045066666666666665)]

In [5]:
# Clear cache table

conn = sqlite3.connect(database_file)
c = conn.cursor()

# clear_cache(c)
data = fetch_data_batches(c, table_name, "1=1", 0,
                          total_keys_to_fetch, data_features)

conn.commit()
conn.close()

len(data[0])

Fetched 1 keys for offset: 0, limit: 5000


656

In [6]:
from numpy import average


sequences = calculate_sequences_in_batches(
    H_values, T_values, data, batch_size=1000)

list(sequences.values())[0][0].astype(np.float32)

                                             

array([[[0.4311546 , 0.20692594],
        [0.4419228 , 0.19918378],
        [0.45118552, 0.19252406],
        ...,
        [0.7589509 , 0.08078796],
        [0.7643619 , 0.08103371],
        [0.76977175, 0.08130593]],

       [[0.6914667 , 0.07386667],
        [0.6914667 , 0.07386667],
        [0.6914667 , 0.07386667],
        ...,
        [0.8243692 , 0.11884554],
        [0.82893395, 0.12176166],
        [0.82893395, 0.12176166]],

       [[0.7643619 , 0.08103371],
        [0.76977175, 0.08130593],
        [0.7717051 , 0.08438332],
        ...,
        [0.7383553 , 0.10410279],
        [0.7383553 , 0.10410279],
        [0.7350397 , 0.10131099]],

       ...,

       [[0.7162667 , 0.08573333],
        [0.7162667 , 0.08573333],
        [0.7162667 , 0.08573333],
        ...,
        [0.7577289 , 0.11537191],
        [0.7577289 , 0.11537191],
        [0.7529173 , 0.11067197]],

       [[0.57513666, 0.10562019],
        [0.5875139 , 0.10224175],
        [0.5982294 , 0.09931685],
        .

# Models

In [7]:
import torch
import torch.nn as nn
import torch.optim as optim


def train_model(model, X_train, y_train, epochs=50, batch_size=64, learning_rate=0.001):
    device = model.device
    model.to(device)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(device)
    y_train_tensor = torch.tensor(y_train, dtype=torch.float32).to(device)

    dataset = torch.utils.data.TensorDataset(X_train_tensor, y_train_tensor)
    train_loader = torch.utils.data.DataLoader(
        dataset, batch_size=batch_size, shuffle=True)

    model.train()
    for epoch in range(epochs):
        for X_batch, y_batch in tqdm(train_loader, desc=f'Epoch {epoch+1}/{epochs}'):
            optimizer.zero_grad()
            output = model(X_batch)
            loss = criterion(output, y_batch)
            loss.backward()
            optimizer.step()
        print(f'Epoch {epoch+1}/{epochs}, Loss: {loss.item()}')

# Function to predict with the PyTorch model


def predict_model(model, X):
    device = model.device
    model.to(device)
    model.eval()
    with torch.no_grad():
        X_tensor = torch.tensor(X, dtype=torch.float32).to(device)
        output = model(X_tensor)
    return output.cpu().numpy()


class TrajectoryPredictor(nn.Module):
    def __init__(self, input_shape, lstm_units=128, dropout_rate=0.2, device='cpu'):
        super(TrajectoryPredictor, self).__init__()
        self.lstm1 = nn.LSTM(input_shape[-1], lstm_units, batch_first=True)
        self.dropout1 = nn.Dropout(dropout_rate)
        self.lstm2 = nn.LSTM(lstm_units, lstm_units, batch_first=True)
        self.dropout2 = nn.Dropout(dropout_rate)
        self.fc = nn.Linear(lstm_units, input_shape[-1])
        self.device = device

    def forward(self, x):
        x, _ = self.lstm1(x)
        x = self.dropout1(x)
        x, _ = self.lstm2(x)
        x = self.dropout2(x)
        x = self.fc(x[:, -1, :])  # taking the output of the last time step
        return x

    def fit(self, X, y, epochs=50, batch_size=64, learning_rate=0.001):
        train_model(self, X, y, epochs, batch_size, learning_rate)

    def predict(self, X):
        return predict_model(self, X)

In [8]:
input_shape = T_values[0], len(data_features)
trajectory_predictor_model = TrajectoryPredictor(
    input_shape=input_shape)
trajectory_predictor_model

TrajectoryPredictor(
  (lstm1): LSTM(2, 128, batch_first=True)
  (dropout1): Dropout(p=0.2, inplace=False)
  (lstm2): LSTM(128, 128, batch_first=True)
  (dropout2): Dropout(p=0.2, inplace=False)
  (fc): Linear(in_features=128, out_features=2, bias=True)
)

In [9]:
# Training Parameters

device = 'gpu' if torch.cuda.is_available() else 'cpu'

print(f'Using {device} device')

Using gpu device


In [10]:
model_getters = {
    'linear_regression': lambda H, T: (LinearRegression(), data_features, (-1, H*len(data_features))),
    'lstm': lambda H, T: (TrajectoryPredictor(
        input_shape=(H, len(data_features)),
    ), data_features, (-1, H, len(data_features)))
}

In [11]:
trained_models, rmse_results, absolute_errors = compare_models(
    database_file, table_name, H_values, T_values, model_getters, data_features=data_features, total_keys_to_fetch=total_keys_to_fetch, batch_size=total_keys_to_fetch, train=True)

rmse_results

Model loop:   0%|          | 0/2 [00:00<?, ?it/s]

Fetched 1 keys for offset: 0, limit: 5000



[A

Fitting model linear_regression with features ['normalized_pos_x', 'normalized_pos_z']
Fitting model lstm with features ['normalized_pos_x', 'normalized_pos_z']




Epoch 1/50:   0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/50, Loss: 0.09730634093284607


Epoch 2/50:   0%|          | 0/7 [00:00<?, ?it/s]

Epoch 2/50, Loss: 0.003162371227517724


Epoch 3/50:   0%|          | 0/7 [00:00<?, ?it/s]

Epoch 3/50, Loss: 0.012494750320911407


Epoch 4/50:   0%|          | 0/7 [00:00<?, ?it/s]

Epoch 4/50, Loss: 0.004534839186817408


Epoch 5/50:   0%|          | 0/7 [00:00<?, ?it/s]

Epoch 5/50, Loss: 0.00273524085059762


Epoch 6/50:   0%|          | 0/7 [00:00<?, ?it/s]

Epoch 6/50, Loss: 0.002985765691846609


Epoch 7/50:   0%|          | 0/7 [00:00<?, ?it/s]

Epoch 7/50, Loss: 0.003087572520598769


Epoch 8/50:   0%|          | 0/7 [00:00<?, ?it/s]

Epoch 8/50, Loss: 0.0025272227358072996


Epoch 9/50:   0%|          | 0/7 [00:00<?, ?it/s]

Epoch 9/50, Loss: 0.0022843608167022467


Epoch 10/50:   0%|          | 0/7 [00:00<?, ?it/s]

Epoch 10/50, Loss: 0.0020444097463041544


Epoch 11/50:   0%|          | 0/7 [00:00<?, ?it/s]

Epoch 11/50, Loss: 0.0021809637546539307


Epoch 12/50:   0%|          | 0/7 [00:00<?, ?it/s]

Epoch 12/50, Loss: 0.0022918900940567255


Epoch 13/50:   0%|          | 0/7 [00:00<?, ?it/s]

Epoch 13/50, Loss: 0.003206619992852211


Epoch 14/50:   0%|          | 0/7 [00:00<?, ?it/s]

Epoch 14/50, Loss: 0.0028099333867430687


Epoch 15/50:   0%|          | 0/7 [00:00<?, ?it/s]

Epoch 15/50, Loss: 0.002356948098167777


Epoch 16/50:   0%|          | 0/7 [00:00<?, ?it/s]

Epoch 16/50, Loss: 0.002193690976127982


Epoch 17/50:   0%|          | 0/7 [00:00<?, ?it/s]

Epoch 17/50, Loss: 0.0023144336882978678


Epoch 18/50:   0%|          | 0/7 [00:00<?, ?it/s]

Epoch 18/50, Loss: 0.0020225048065185547


Epoch 19/50:   0%|          | 0/7 [00:00<?, ?it/s]

Epoch 19/50, Loss: 0.0024135191924870014


Epoch 20/50:   0%|          | 0/7 [00:00<?, ?it/s]

Epoch 20/50, Loss: 0.0016699591651558876


Epoch 21/50:   0%|          | 0/7 [00:00<?, ?it/s]

Epoch 21/50, Loss: 0.0015225630486384034


Epoch 22/50:   0%|          | 0/7 [00:00<?, ?it/s]

Epoch 22/50, Loss: 0.0019527028780430555


Epoch 23/50:   0%|          | 0/7 [00:00<?, ?it/s]

Epoch 23/50, Loss: 0.0028158596251159906


Epoch 24/50:   0%|          | 0/7 [00:00<?, ?it/s]

Epoch 24/50, Loss: 0.0029603992588818073


Epoch 25/50:   0%|          | 0/7 [00:00<?, ?it/s]

Epoch 25/50, Loss: 0.001757491147145629


Epoch 26/50:   0%|          | 0/7 [00:00<?, ?it/s]

Epoch 26/50, Loss: 0.0018797487718984485


Epoch 27/50:   0%|          | 0/7 [00:00<?, ?it/s]

Epoch 27/50, Loss: 0.00238808523863554


Epoch 28/50:   0%|          | 0/7 [00:00<?, ?it/s]

Epoch 28/50, Loss: 0.0024829423055052757


Epoch 29/50:   0%|          | 0/7 [00:00<?, ?it/s]

Epoch 29/50, Loss: 0.001426795613951981


Epoch 30/50:   0%|          | 0/7 [00:00<?, ?it/s]

Epoch 30/50, Loss: 0.0022079136688262224


Epoch 31/50:   0%|          | 0/7 [00:00<?, ?it/s]

Epoch 31/50, Loss: 0.0020420653745532036


Epoch 32/50:   0%|          | 0/7 [00:00<?, ?it/s]

Epoch 32/50, Loss: 0.0017670098459348083


Epoch 33/50:   0%|          | 0/7 [00:00<?, ?it/s]

Epoch 33/50, Loss: 0.002339918864890933


Epoch 34/50:   0%|          | 0/7 [00:00<?, ?it/s]

Epoch 34/50, Loss: 0.002210459904745221


Epoch 35/50:   0%|          | 0/7 [00:00<?, ?it/s]

Epoch 35/50, Loss: 0.0022442007903009653


Epoch 36/50:   0%|          | 0/7 [00:00<?, ?it/s]

Epoch 36/50, Loss: 0.0018799460958689451


Epoch 37/50:   0%|          | 0/7 [00:00<?, ?it/s]

Epoch 37/50, Loss: 0.00209890422411263


Epoch 38/50:   0%|          | 0/7 [00:00<?, ?it/s]

Epoch 38/50, Loss: 0.0017697472358122468


Epoch 39/50:   0%|          | 0/7 [00:00<?, ?it/s]

Epoch 39/50, Loss: 0.0015180796617642045


Epoch 40/50:   0%|          | 0/7 [00:00<?, ?it/s]

Epoch 40/50, Loss: 0.0015780178364366293


Epoch 41/50:   0%|          | 0/7 [00:00<?, ?it/s]

Epoch 41/50, Loss: 0.0016598167130723596


Epoch 42/50:   0%|          | 0/7 [00:00<?, ?it/s]

Epoch 42/50, Loss: 0.0015079878503456712


Epoch 43/50:   0%|          | 0/7 [00:00<?, ?it/s]

Epoch 43/50, Loss: 0.0018061442533507943


Epoch 44/50:   0%|          | 0/7 [00:00<?, ?it/s]

Epoch 44/50, Loss: 0.0019150407752022147


Epoch 45/50:   0%|          | 0/7 [00:00<?, ?it/s]

Epoch 45/50, Loss: 0.0016857736045494676


Epoch 46/50:   0%|          | 0/7 [00:00<?, ?it/s]

Epoch 46/50, Loss: 0.0015615633456036448


Epoch 47/50:   0%|          | 0/7 [00:00<?, ?it/s]

Epoch 47/50, Loss: 0.0022499682381749153


Epoch 48/50:   0%|          | 0/7 [00:00<?, ?it/s]

Epoch 48/50, Loss: 0.0018008674960583448


Epoch 49/50:   0%|          | 0/7 [00:00<?, ?it/s]

Epoch 49/50, Loss: 0.0016852575354278088


Epoch 50/50:   0%|          | 0/7 [00:00<?, ?it/s]

Model loop: 100%|██████████| 2/2 [10:10<00:00, 305.04s/it]
[A
Model loop: 100%|██████████| 2/2 [10:10<00:00, 305.05s/it]

Epoch 50/50, Loss: 0.0012242173543199897





defaultdict(int,
            {(200, 10, 'linear_regression'): 0.0029198674019426107,
             (200, 10, 'lstm'): 0.022338954731822014})

In [12]:
# Print rmse results

for model_name, rmse in rmse_results.items():
    print(f"{model_name}: {rmse}")

(200, 10, 'linear_regression'): 0.0029198674019426107
(200, 10, 'lstm'): 0.022338954731822014
