In [None]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import seaborn 
import scipy as sp
from torch.utils.data.dataset import TensorDataset
from torch.utils.data import DataLoader
import torch.nn.functional as F
from torch.autograd import Variable
from utils import *

In [None]:
# ok so for the SLEAP data since we only had one session/sample with 2 mice, 
# to get the train,valid, test split, I took different time windows
# this will also help us define dynamic interactions i.e the interaction matrix for the different chunks
# might not be the same at all timesteps/frames

In [2]:
# load the locations and velocities
locations = np.load('thorax_locations.npy')
locations = np.reshape(locations, (1,63033,2,2))
locations.shape

(1, 63033, 2, 2)

In [3]:
# load and reshape velocities
velocities = np.load('velocities.npy')
velocities = np.reshape(velocities, (1,63033,2,2))
velocities.shape

(1, 63033, 2, 2)

In [4]:
# split into train, valid, test
locations_train = locations[:,:20000,:,:]
locations_valid = locations[:,39000:45000,:,:]
locations_test = locations[:,45000:51000,:,:]

In [5]:
velocities_train = velocities[:,:20000,:,:]
velocities_valid = velocities[:,39000:45000,:,:]
velocities_test = velocities[:,45000:51000,:,:]

In [None]:
# construct the edge matrices

# based on their construction, the edge interactions are symmetric,
# i.e. they are bi-directional,if there's an interaction it goes both ways
# this might not fully capture the interactions in live agents as sometimes an interaction
# can be uni-directional; later we can try change this and if it changes anything
# NB: the charged particles class allows for self-interactions 

In [13]:
# Case 1: Non-dynamic interaction matrix
# Start with the assumption that the mice are interacting at all timesteps
# later can repeat the same thing assuming no interaction

edges_present = np.array([[0,1],[1,0]], dtype=float)
print(edges_present)
print(edges_present.shape)

[[0. 1.]
 [1. 0.]]
(2, 2)


In [14]:
edges_present = np.reshape(edges_present, (1,2,2))
edges_present.shape

(1, 2, 2)

In [15]:
# define train, valiudation and test data all to have interactions present
define_edges = edges_present # use this to change to no edges later

edges_train = define_edges
edges_valid = define_edges
edges_test = define_edges

In [19]:
edges_valid.shape

(1, 2, 2)

In [41]:
# because of the current shape, the model might memorieze the trajectory 
# and not be able to generalize, so let's reshape it to make batches from different 
# time windows -- for now we'll only do non-overlapping but later we can do overlapping

def batch_ready(data,seq_len=100):
    formatted = np.reshape(data,(data.shape[1]//seq_len, seq_len, data.shape[2],data.shape[3]))
    print('batch_ready data shape: ', formatted.shape)
    return formatted

# reformatting to allow for batch sizes bigger than 1
formatted_locs_train = batch_ready(locations_train)
formatted_locs_valid = batch_ready(locations_valid)
formatted_locs_test = batch_ready(locations_test)

formatted_vel_train = batch_ready(velocities_train)
formatted_vel_valid = batch_ready(velocities_valid)
formatted_vel_test = batch_ready(velocities_test)
    

batch_ready data shape:  (200, 100, 2, 2)
batch_ready data shape:  (60, 100, 2, 2)
batch_ready data shape:  (60, 100, 2, 2)
batch_ready data shape:  (200, 100, 2, 2)
batch_ready data shape:  (60, 100, 2, 2)
batch_ready data shape:  (60, 100, 2, 2)


In [65]:
# do the same for the edges: get it in the same shape

n_train = formatted_locs_train.shape[0]
n_valid = formatted_locs_valid.shape[0]
n_test = formatted_locs_test.shape[0]

formatted_edges_train = np.broadcast_to(edges_present,(n_train,2,2)).copy()
formatted_edges_valid = np.broadcast_to(edges_present,(n_valid,2,2)).copy()
formatted_edges_test = np.broadcast_to(edges_present,(n_test,2,2)).copy()

print(formatted_edges_train.shape)
print(formatted_edges_valid.shape)
print(formatted_edges_test.shape)

(200, 2, 2)
(60, 2, 2)
(60, 2, 2)


In [None]:
# dynamic LOAD


In [86]:
# made it inot a function that lives in utils
locs_list, vel_list, edges_list = load_SLEAP()

locations.shape at start (1, 63033, 2, 2)
velocities.shape at start (1, 63033, 2, 2)
edges.shape at start (1, 2, 2)
batch_ready data shape:  (200, 100, 2, 2)
batch_ready data shape:  (60, 100, 2, 2)
batch_ready data shape:  (60, 100, 2, 2)
batch_ready data shape:  (200, 100, 2, 2)
batch_ready data shape:  (60, 100, 2, 2)
batch_ready data shape:  (60, 100, 2, 2)
formatted edges_train.shape (200, 2, 2)
formatted edges_valid.shape (60, 2, 2)
formatted edges_test.shape (60, 2, 2)


In [90]:
# madde function to preprocess and load SLEAP data into the data loaders - now lives in utils!
train_data_loader, valid_data_loader, test_data_loader, loc_max, loc_min, vel_max, vel_min = preprocess_SLEAP()

locations.shape at start (1, 63033, 2, 2)
velocities.shape at start (1, 63033, 2, 2)
edges.shape at start (1, 2, 2)
batch_ready data shape:  (200, 100, 2, 2)
batch_ready data shape:  (60, 100, 2, 2)
batch_ready data shape:  (60, 100, 2, 2)
batch_ready data shape:  (200, 100, 2, 2)
batch_ready data shape:  (60, 100, 2, 2)
batch_ready data shape:  (60, 100, 2, 2)
formatted edges_train.shape (200, 2, 2)
formatted edges_valid.shape (60, 2, 2)
formatted edges_test.shape (60, 2, 2)
loc_train.shape after preprocessing:  (200, 2, 100, 2)
vel_train.shape after preprocessing:  (200, 2, 100, 2)
edges_train.shape after preprocessing:  torch.Size([200, 4])
edges_train.shape after 2nd preprocessing:  torch.Size([200, 2])
