In [10]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import importlib
import matplotlib.pyplot as plt
import stumpy
import numpy as np
import torch
import os
import sys
# Add project root to path - adjust the number of parent dirs (..) based on where your notebook is located
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)


from src.datamodule import TrajectoryDataModule
import src.visualization as vis
import src.models as models
import src.utils as utils
from src.utils import PacmanDataReader

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Data Handling
Data reading, filtering, transformation, etc. will be handled by the `PacmanDataReader` class. This is a singleton class that will be initialized once and then reused whenever needed (e.g, by visualization functions).

When initialized, the `PacmanDataReader` class will read the data from the data folder and filter out banned users.
5 dataframes are created:
- `game_df`: contains the game metadata
- `gamestate_df`: contains the gamestate data
- `user_df`: contains the user metadata
- `ip_df`: contains the ip metadata
- `redcap_df`: contains the redcap data
- `psychometrics_df`: contains the psychometric data


In [11]:
data = PacmanDataReader(data_folder='../data/', read_games_only=False)
data.gamestate_df.columns

Index(['game_state_id', 'game_id', 'time_elapsed', 'score', 'lives',
       'pacman_attack', 'input_direction', 'movement_direction', 'Pacman_X',
       'Pacman_Y', 'Ghost1_X', 'Ghost1_Y', 'Ghost2_X', 'Ghost2_Y', 'Ghost3_X',
       'Ghost3_Y', 'Ghost4_X', 'Ghost4_Y', 'ghost1_state', 'ghost2_state',
       'ghost3_state', 'ghost4_state', 'powerPellets', 'pellets',
       'powerpelletstate_1', 'powerpelletstate_2', 'powerpelletstate_3',
       'powerpelletstate_4', 'fruitState_1', 'fruitState_2'],
      dtype='object')

Attribute `self.gamestate_df` contains data as logged and retreived from the SQL database.


In [12]:
data.gamestate_df.head()

Unnamed: 0,game_state_id,game_id,time_elapsed,score,lives,pacman_attack,input_direction,movement_direction,Pacman_X,Pacman_Y,...,ghost3_state,ghost4_state,powerPellets,pellets,powerpelletstate_1,powerpelletstate_2,powerpelletstate_3,powerpelletstate_4,fruitState_1,fruitState_2
3955,220049,388,0.19,10,3,0,right,right,1.485321,-9.489613,...,0,0,4,243,1,1,1,1,0,0
3956,220050,388,0.23,20,3,0,none,right,1.939881,-9.489613,...,0,0,4,242,1,1,1,1,0,0
3957,220051,388,0.29,20,3,0,none,right,2.240156,-9.503768,...,0,0,4,242,1,1,1,1,0,0
3958,220052,388,0.33,20,3,0,none,right,2.694716,-9.500579,...,0,0,4,242,1,1,1,1,0,0
3959,220053,388,0.4,30,3,0,none,right,2.997756,-9.499909,...,0,0,4,241,1,1,1,1,0,0


The methods `self.get_trajectory_array()` and `self.get_partial_trajectory_array()` are used to get trajectories in form of an `np.ndarray`, suitable for mathematical analysis.

In [13]:
data.get_trajectory_array(game_id=600, get_timevalues= True)
data.get_trajectory_array(game_id= 600).__len__()

data.get_partial_trajectory_array(game_id=600, end_timestep=10)
data.get_partial_trajectory_array(game_id=600, end_timestep=10).__len__()

array([[ 0.18000001,  1.48531497, -9.48961735],
       [ 0.23      ,  1.78835499, -9.48961735],
       [ 0.28      ,  2.24015045, -9.50377083],
       ...,
       [76.79000092,  1.94490552,  8.51200962],
       [76.81999969,  2.39679694,  8.49795246],
       [76.88999939,  2.69983697,  8.49948978]])

1535

array([[ 1.48531497, -9.48961735],
       [ 1.78835499, -9.48961735],
       [ 2.24015045, -9.50377083],
       [ 2.54319048, -9.50121593],
       [ 2.99775052, -9.4999094 ],
       [ 3.30070567, -9.49955559],
       [ 3.75526571, -9.49947357],
       [ 4.02023363, -9.50358009],
       [ 4.48067808, -9.50836086],
       [ 4.78371763, -9.50836086]])

10

 Method `self.get_trajectory_dataframe()` and `self.filter_gamestate_data()` are used internally by the `DataModule` to setup the dataset for model training

In [14]:
pacman_ts = data.get_trajectory_dataframe(series_type=['position'])

pacman_ts.head()

datamodule = TrajectoryDataModule(data_folder='../data/')

tensordf, mask, game_idx = datamodule._create_game_trajectory_tensor(trajectories_df=pacman_ts)

tensordf.shape


Unnamed: 0,game_id,time_elapsed,Pacman_X,Pacman_Y
3955,388,0.19,1.485321,-9.489613
3956,388,0.23,1.939881,-9.489613
3957,388,0.29,2.240156,-9.503768
3958,388,0.33,2.694716,-9.500579
3959,388,0.4,2.997756,-9.499909


torch.Size([656, 6320, 3])