In [1]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import importlib
import matplotlib.pyplot as plt
import stumpy
import numpy as np
import torch
import os
import sys
# Add project root to path - adjust the number of parent dirs (..) based on where your notebook is located
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)


import src.datamodule as datamodule
import src.visualization as vis
import src.models as models
import src.utils as utils
from src.utils import PacmanDataReader

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

## Data Handling
Data reading, filtering, transformation, etc. will be handled by the `PacmanDataReader` class. This is a singleton class that will be initialized once and then reused whenever needed (e.g, by visualization functions).

When initialized, the `PacmanDataReader` class will read the data from the data folder and filter out banned users.
5 dataframes are created:
- `game_df`: contains the game metadata
- `gamestate_df`: contains the gamestate data
- `user_df`: contains the user metadata
- `ip_df`: contains the ip metadata
- `redcap_df`: contains the redcap data
- `psychometrics_df`: contains the psychometric data


In [2]:
data = PacmanDataReader(data_folder='../data/', read_games_only=False)
data.gamestate_df.columns

Index(['game_state_id', 'game_id', 'time_elapsed', 'score', 'lives',
       'pacman_attack', 'input_direction', 'movement_direction', 'Pacman_X',
       'Pacman_Y', 'Ghost1_X', 'Ghost1_Y', 'Ghost2_X', 'Ghost2_Y', 'Ghost3_X',
       'Ghost3_Y', 'Ghost4_X', 'Ghost4_Y', 'ghost1_state', 'ghost2_state',
       'ghost3_state', 'ghost4_state', 'powerPellets', 'pellets',
       'powerpelletstate_1', 'powerpelletstate_2', 'powerpelletstate_3',
       'powerpelletstate_4', 'fruitState_1', 'fruitState_2'],
      dtype='object')

In [3]:
data.gamestate_df.head()

Unnamed: 0,game_state_id,game_id,time_elapsed,score,lives,pacman_attack,input_direction,movement_direction,Pacman_X,Pacman_Y,...,ghost3_state,ghost4_state,powerPellets,pellets,powerpelletstate_1,powerpelletstate_2,powerpelletstate_3,powerpelletstate_4,fruitState_1,fruitState_2
3955,220049,388,0.19,10,3,0,right,right,1.49,-9.49,...,0,0,4,243,1,1,1,1,0,0
3956,220050,388,0.23,20,3,0,none,right,1.94,-9.49,...,0,0,4,242,1,1,1,1,0,0
3957,220051,388,0.29,20,3,0,none,right,2.24,-9.5,...,0,0,4,242,1,1,1,1,0,0
3958,220052,388,0.33,20,3,0,none,right,2.69,-9.5,...,0,0,4,242,1,1,1,1,0,0
3959,220053,388,0.4,30,3,0,none,right,3.0,-9.5,...,0,0,4,241,1,1,1,1,0,0


In [4]:
pacman_ts = data.get_trajectory_dataframe(series_type=['position'])

pacman_ts.head()


Unnamed: 0,game_id,time_elapsed,Pacman_X,Pacman_Y
3955,388,0.19,1.49,-9.49
3956,388,0.23,1.94,-9.49
3957,388,0.29,2.24,-9.5
3958,388,0.33,2.69,-9.5
3959,388,0.4,3.0,-9.5


Specifically for torch model training, `TrajectoryDataModule` class uses `PacmanDataReader` to create input tensors.

In [5]:
datamodule = datamodule.TrajectoryDataModule(data_folder='../data/',
            batch_size=32,
            max_sequence_length=None,
            series_type=['position'],
            include_game_state_vars=False,
            include_timesteps=True
        )
datamodule.setup()

tensordf, mask, game_idx = datamodule._create_game_trajectory_tensor(trajectories_df=pacman_ts)

In [6]:
tensordf.shape

torch.Size([656, 6320, 3])