In [61]:
import numpy as np
import pandas as pd

import operator as op

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score, root_mean_squared_error
from collections import defaultdict
from tqdm.notebook import tqdm

from utils.get_or_create_combined_database import get_or_create_combined_database
from utils.create_compound_key_and_index import create_compound_key_and_index
from utils.create_sequences_in_batches import calculate_sequences_in_batches
from utils.create_sequences_in_batches import create_sequences_from_database_rows
from utils.compare_models import compare_models

from concurrent.futures import ProcessPoolExecutor

from constants import DB_columns, GAME_AREA_WIDTH, DEFAULT_DATA_FEATURES

from matplotlib import pyplot as plt

import os
from dotenv import load_dotenv
load_dotenv(verbose=True, override=True)

RECREATE_CLEANED_DATA = False

zoom_range = ((75, 14350), (75, 14350))
normalized_zoom_range = ((0, 1), (0, 1))

# Data

In [62]:
database_folder = os.getenv("DATABASE_FOLDER")

database_file = get_or_create_combined_database(database_folder)

table_name = "champs_cleaned"

data_features = [DB_columns.NORMALIZED_POS_X.value,
                 DB_columns.NORMALIZED_POS_Z.value]

total_keys_to_fetch = 100

H_values = [200]
T_values = [10]

Found 2 database files in the folder specified by DATABASE_FOLDER
Found combined database /u/23/tarpill1/unix/Documents/combined2.db


In [63]:
import sqlite3
from utils.get_data import fetch_data_batches

conn = sqlite3.connect(database_file)
c = conn.cursor()

data = fetch_data_batches(c, table_name, "1=1", 0, 100, data_features)

conn.commit()
conn.close()

data

Fetched 100 keys for offset: 0, limit: 100


array([list([(0.0088, 0.0268), (0.0088, 0.0268), (0.0088, 0.0268), (0.0088, 0.0268), (0.0088, 0.0268), (0.0088, 0.0268), (0.0088, 0.0268), (0.0088, 0.0268), (0.0088, 0.0268), (0.0088, 0.0268), (0.0088, 0.0268), (0.0088, 0.0268), (0.0088, 0.0268), (0.0088, 0.0268), (0.0088, 0.0268), (0.0088, 0.0268), (0.0088, 0.0268), (0.0088, 0.0268), (0.0088, 0.0268), (0.0088, 0.0268), (0.0088, 0.0268), (0.0088, 0.0268), (0.0088, 0.0268), (0.0088, 0.0268), (0.0088, 0.0268), (0.0088, 0.0268), (0.0088, 0.0268), (0.0088, 0.0268), (0.0088, 0.0268), (0.0088, 0.0268), (0.0088, 0.0268), (0.0088, 0.0268), (0.0088, 0.0268), (0.0088, 0.0268), (0.0088, 0.0268), (0.0088, 0.0268), (0.0088, 0.0268), (0.0088, 0.0268), (0.0088, 0.0268), (0.0088, 0.0268), (0.0088, 0.0268), (0.0088, 0.0268), (0.0088, 0.0268), (0.0088, 0.0268), (0.0088, 0.0268), (0.0088, 0.0268), (0.0088, 0.0268), (0.0088, 0.0268), (0.0088, 0.0268), (0.0088, 0.0268), (0.0088, 0.0268), (0.0088, 0.0268), (0.0088, 0.0268), (0.0088, 0.0268), (0.0088, 0.0268

In [64]:
sequences = calculate_sequences_in_batches(
    H_values, T_values, data, batch_size=1000)

sequences






[A[A[A[A[A





[A[A[A[A[A[A





[A[A[A[A[A[A





[A[A[A[A[A[A




[A[A[A[A[A




[A[A[A[A[A

{(200,
  10): (array([[[0.05186667, 0.03333333],
          [0.05186667, 0.03333333],
          [0.05186667, 0.03333333],
          ...,
          [0.6058558 , 0.24949422],
          [0.6058558 , 0.24949422],
          [0.60420567, 0.24697413]],
  
         [[0.713593  , 0.16152503],
          [0.713593  , 0.16152503],
          [0.71962083, 0.15496538],
          ...,
          [0.88095167, 0.2203473 ],
          [0.88095167, 0.2203473 ],
          [0.88095167, 0.2203473 ]],
  
         [[0.46745524, 0.30447113],
          [0.4691282 , 0.31427562],
          [0.46896967, 0.32409927],
          ...,
          [0.22677327, 0.55282547],
          [0.2236    , 0.5496    ],
          [0.2181633 , 0.5461728 ]],
  
         ...,
  
         [[0.93018153, 0.58626127],
          [0.9293774 , 0.57599893],
          [0.92838047, 0.563273  ],
          ...,
          [0.8976    , 0.3092    ],
          [0.8976    , 0.3092    ],
          [0.8976    , 0.3092    ]],
  
         [[0.0088    , 0.0268 

# Models

In [65]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam


class TrajectoryPredictor:
    def __init__(self, input_shape, lstm_units=128, dropout_rate=0.2, device='/cpu:0'):
        self.device = device
        self.model = self.build_model(input_shape, lstm_units, dropout_rate)

    def build_model(self, input_shape, lstm_units, dropout_rate):
        model = Sequential()
        model.add(LSTM(units=lstm_units, return_sequences=True,
                  input_shape=input_shape))
        model.add(Dropout(rate=dropout_rate))
        model.add(LSTM(units=lstm_units))
        model.add(Dropout(rate=dropout_rate))
        # Output layer with the same number of features as input
        model.add(Dense(units=input_shape[-1]))
        model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
        model.device = self.device
        return model

    def train(self, X_train, y_train, batch_size=64, epochs=50, validation_split=0.2):
        self.model.fit(X_train, y_train, batch_size=batch_size,
                       epochs=epochs, validation_split=validation_split)

    def predict(self, X):
        return self.model.predict(X)

In [66]:
trajectory_predictor_model = TrajectoryPredictor(
    input_shape=(T_values[0], len(data_features)), device='/cpu:0').model

  super().__init__(**kwargs)


In [67]:
# Training Parameters

device = '\gpu:0' if tf.config.list_physical_devices('GPU') else '/cpu:0'

print(f"Using device: {device}")

Using device: /cpu:0


In [71]:
model_getters = {
    'linear_regression': lambda H, T: (LinearRegression(), data_features, (-1, H*len(data_features))),
    'lstm': lambda H, T: (TrajectoryPredictor(
        input_shape=(H, len(data_features)),
        device=device
    ).model, data_features, (-1, H, len(data_features)))
}

In [73]:
trained_models, rmse_results, absolute_errors = compare_models(
    database_file, table_name, H_values, T_values, model_getters, data_features=data_features, total_keys_to_fetch=total_keys_to_fetch, batch_size=20, train=True)

rmse_results







[A[A[A[A[A[A

Fetched 100 keys for offset: 0, limit: 20









  super().__init__(**kwargs)








[A[A[A[A[A[A[A[A

Fitting model linear_regression with features ['normalized_pos_x', 'normalized_pos_z']
(44600, 400)








[A[A[A[A[A[A







[A[A[A[A[A[A[A[A

Fitting model lstm with features ['normalized_pos_x', 'normalized_pos_z']
(44600, 200, 2)


Model loop:   0%|          | 0/2 [07:29<?, ?it/s]
Model loop:   0%|          | 0/2 [06:50<?, ?it/s]
Model loop:   0%|          | 0/2 [06:19<?, ?it/s]
Model loop:   0%|          | 0/2 [06:02<?, ?it/s]
Model loop:   0%|          | 0/2 [05:23<?, ?it/s]
Model loop:   0%|          | 0/2 [05:06<?, ?it/s]


[1m1394/1394[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m124s[0m 88ms/step - loss: 0.0081
[1m1394/1394[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 36ms/step








[A[A[A[A[A[A







[A[A[A[A[A[A[A[A







[A[A[A[A[A[A[A[A






[A[A[A[A[A[A[A






[A[A[A[A[A[A[A

Fetched 100 keys for offset: 20, limit: 20


  super().__init__(**kwargs)


Fitting model linear_regression with features ['normalized_pos_x', 'normalized_pos_z']
(44600, 400)








[A[A[A[A[A[A

Fitting model lstm with features ['normalized_pos_x', 'normalized_pos_z']
(44600, 200, 2)
[1m1394/1394[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m124s[0m 88ms/step - loss: 0.0076
[1m1394/1394[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 36ms/step








                                                     

Fetched 100 keys for offset: 40, limit: 20


  super().__init__(**kwargs)


Fitting model linear_regression with features ['normalized_pos_x', 'normalized_pos_z']
(44600, 400)








[A[A[A[A[A[A

Fitting model lstm with features ['normalized_pos_x', 'normalized_pos_z']
(44600, 200, 2)
[1m1394/1394[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m125s[0m 89ms/step - loss: 0.0079
[1m1394/1394[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 39ms/step








                                                     

Fetched 100 keys for offset: 60, limit: 20


  super().__init__(**kwargs)


Fitting model linear_regression with features ['normalized_pos_x', 'normalized_pos_z']
(44600, 400)








[A[A[A[A[A[A

Fitting model lstm with features ['normalized_pos_x', 'normalized_pos_z']
(44600, 200, 2)
[1m1394/1394[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m124s[0m 88ms/step - loss: 0.0077
[1m1394/1394[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 37ms/step








                                                     

Fetched 100 keys for offset: 80, limit: 20


  super().__init__(**kwargs)


Fitting model linear_regression with features ['normalized_pos_x', 'normalized_pos_z']
(44600, 400)








[A[A[A[A[A[A

Fitting model lstm with features ['normalized_pos_x', 'normalized_pos_z']
(44600, 200, 2)
[1m1394/1394[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m122s[0m 87ms/step - loss: 0.0081
[1m1394/1394[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 36ms/step








Model loop: 10it [14:40, 88.08s/it]                  


defaultdict(int,
            {(200, 10, 'linear_regression'): 0.15465819209491613,
             (200, 10, 'lstm'): 0.1983176822168336})

In [74]:
# Print rmse results

for model_name, rmse in rmse_results.items():
    print(f"{model_name}: {rmse}")

(200, 10, 'linear_regression'): 0.15465819209491613
(200, 10, 'lstm'): 0.1983176822168336
