# Data Format

In [1]:
data_neural = \
{'train_loc': {1: 2, 2: 1, 3: 1, 4: 12, 5: 1, 6: 2, 7: 1, 8: 1, 9: 1, 10: 1, 11: 1, 12: 1, 13: 1, 14: 1, 15: 1, 16: 1, 17: 1, 18: 1, 19: 1, 20: 1, 21: 1, 22: 1, 23: 1, 24: 2, 25: 2, 26: 1, 27: 1, 28: 1, 29: 1, 30: 1, 31: 1, 32: 1, 33: 1}, 
'explore': 0.15384615384615385, 
'train': [0, 1, 2, 3, 4], 
'entropy': 3.219290807235925, 
'rg': 0.03142983221874803, 
'sessions': {
0: [[1, 30], [2, 45], [3, 46], [4, 14], [4, 16], [5, 20], [6, 23], [7, 0], [4, 13], [8, 20], [9, 21]], 
1: [[10, 40], [4, 14], [11, 14], [12, 22], [13, 20], [4, 24], [14, 43], [4, 14], [15, 17], [4, 18], [16, 21]], 
2: [[17, 22], [4, 14], [18, 18], [4, 18], [19, 24], [20, 27], [21, 27]], 
3: [[22, 19], [4, 17], [23, 25], [24, 40], [25, 0], [4, 19], [4, 14], [26, 22], [6, 23]], 
4: [[27, 24], [28, 25], [1, 26], [29, 46], [25, 24], [24, 40], [30, 43], [31, 44], [32, 46], [33, 2]], 
5: [[1, 27], [4, 17], [34, 21], [35, 22], [4, 13], [36, 2], [4, 13], [37, 19], [38, 20], [36, 2], [36, 26]], 
6: [[39, 45], [10, 47], [35, 45], [36, 3], [4, 13]]
},
'test': [5, 6],
'valid_len': 14, 
'pred_len': 43}

# Load Data (New)
**Vertical Split + No Sliding Window**

In [51]:
import pandas as pd
import numpy as np

df_train = pd.read_csv('df_train_1k.csv')
df_test  = pd.read_csv('df_test_1k.csv')

all_uids = df_test['uid'].unique() # store all unique uids

data_neural = {}
train_keys = [0, 1, 2, 3, 4, 5, 6, 7]
test_keys = [8,9]
num_users = len(all_uids)
# train_keys = [0, 1, 2, 3]
# test_keys = [4, 5]
# num_users = 100

# load train data
for id in range(num_users):
    data_neural[id] = {}
    data_neural[id]['sessions'] = {}
    for i in range(0,len(train_keys)*10,10):
        data_neural[id]['sessions'][train_keys[i//10]] = df_train[df_train['uid']==all_uids[id]].iloc[i:i+10][['combined_xy', 't']].to_numpy().tolist()
    for i in range(0,len(test_keys)*10,10):
        data_neural[id]['sessions'][test_keys[i//10]] = df_test[df_test['uid']==all_uids[id]].iloc[i:i+10][['combined_xy', 't']].to_numpy().tolist()
    data_neural[id]['train'] = train_keys
    data_neural[id]['test'] = test_keys

In [52]:
output = {}
output['data_neural'] = data_neural
uid_list = [int(i) for i in range(len(output['data_neural']))]
output['uid_list'] = uid_list

In [53]:
import pickle
with open('DeepMove-master/data/my_data_neural.pk', 'wb') as f:
    pickle.dump(output, f)

# Load Data (Old)

In [1]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader

# # 1k data
df_train = pd.read_csv('df_train_1k.csv')
df_test  = pd.read_csv('df_test_1k.csv')
grouped_data_train = [group for _, group in df_train.groupby('uid')]
grouped_data_test = [group for _, group in df_test.groupby('uid')]

# Sliding window by 1 STEP
STEP_SIZE = 12
WINDOW_SIZE = 10
class TrajectoryDataset(Dataset):
    def __init__(self, grouped_data):
        self.data = []
        for group in grouped_data:
            if len(group) >= STEP_SIZE:
                # get the first STEP_SIZE location and time data
                xy = group['combined_xy'].values.tolist()[:STEP_SIZE]
                t = group['t'].values.tolist()[:STEP_SIZE]
                # slice the data into several sessions using moving window approach
                self.data.extend([(xy[i:i+WINDOW_SIZE], t[i:i+WINDOW_SIZE])
                                  for i in range(STEP_SIZE-WINDOW_SIZE+1)])
    def __len__(self):
        return len(self.data)
    def __getitem__(self, idx):
        xy_window, t_window = self.data[idx]
        inputs = torch.tensor(xy_window[:-1])        # input sequence of locations
        labels = torch.tensor(xy_window[-1])         # desired predicted location
        positions = torch.tensor(t_window[:-1])      # corresponding input locations' times
        label_positions = torch.tensor(t_window[-1]) # corresponding predicted location's time
        return inputs, labels, positions, label_positions

# Sliding window by STEP_SIZE
STEP_SIZE = 20
WINDOW_SIZE = 10
class TrajectoryDatasetTEST(Dataset):
    def __init__(self, grouped_data):
        self.data = []
        for group in grouped_data:
            if len(group) >= STEP_SIZE:
                # get the first STEP_SIZE location and time data
                xy = group['combined_xy'].values.tolist()[:STEP_SIZE]
                t = group['t'].values.tolist()[:STEP_SIZE]
                # slice the data into several sessions using moving window approach
                self.data.extend([(xy[i:i+WINDOW_SIZE], t[i:i+WINDOW_SIZE])
                                  for i in range(0, STEP_SIZE-WINDOW_SIZE+1, STEP_SIZE)])
    def __len__(self):
        return len(self.data)
    def __getitem__(self, idx):
        xy_window, t_window = self.data[idx]
        inputs = torch.tensor(xy_window[:-1])        # input sequence of locations
        labels = torch.tensor(xy_window[-1])         # desired predicted location
        positions = torch.tensor(t_window[:-1])      # corresponding input locations' times
        label_positions = torch.tensor(t_window[-1]) # corresponding predicted location's time
        return inputs, labels, positions, label_positions

train_dataset = TrajectoryDataset(grouped_data_train)
test_dataset  = TrajectoryDatasetTEST(grouped_data_test)

In [8]:
def format_dataneural(train_dataset, test_dataset):
    train_keys = [2,3,4,5]
    test_keys  = [0,1]
    
    data_neural = {}
    data_neural_index = 0
    
    test_data = test_dataset.data
    index = 0
    for i in range(0, len(test_data), 2):
        data_neural[index]={'sessions':{}}
        data_neural[index]['sessions'][0]=[[location,time] for location,time in zip(test_data[i][0],test_data[i][1])]
        data_neural[index]['sessions'][1]=[[location,time] for location,time in zip(test_data[i+1][0],test_data[i+1][1])]
        data_neural[index]['train']=train_keys
        data_neural[index]['test']=test_keys
        index += 1
    
    train_data = train_dataset.data
    index = 0
    for i in range(0, len(train_data), 4):
        data_neural[index]['sessions'][2]=[[location,time] for location,time in zip(train_data[i][0],train_data[i][1])]
        data_neural[index]['sessions'][3]=[[location,time] for location,time in zip(train_data[i+1][0],train_data[i+1][1])]
        data_neural[index]['sessions'][4]=[[location,time] for location,time in zip(train_data[i+2][0],train_data[i+2][1])]
        data_neural[index]['sessions'][5]=[[location,time] for location,time in zip(train_data[i+3][0],train_data[i+3][1])]
        index += 1
        if (index == len(data_neural)):
            break
        
    return data_neural
data_neural = format_dataneural(train_dataset, test_dataset)

In [2]:
def format_dataneural(train_dataset, test_dataset, n, m):
    train_data = train_dataset.data
    test_data = test_dataset.data
    
    data_neural = {}
    index = 0
    
    num_iterations = len(test_data) // m
    
    for i in range(num_iterations):
        data_neural[index] = {'sessions': {}, 'train': [], 'test': []}
        
        # Test Data
        for j in range(m):
            test_index = i * m + j
            if test_index < len(test_data):
                data_neural[index]['sessions'][j] = [[location, time] for location, time in zip(test_data[test_index][0], test_data[test_index][1])]
                data_neural[index]['test'].append(j)
        
        # Train Data
        for j in range(n):
            train_index = i * n + j
            if train_index < len(train_data):
                data_neural[index]['sessions'][m + j] = [[location, time] for location, time in zip(train_data[train_index][0], train_data[train_index][1])]
                data_neural[index]['train'].append(m + j)
        
        index += 1
    
    # Handle any remaining train data
    remaining_train_data = len(train_data) % n
    if remaining_train_data > 0:
        data_neural[index] = {'sessions': {}, 'train': [], 'test': []}
        start_index = num_iterations * n
        for j in range(remaining_train_data):
            train_index = start_index + j
            if train_index < len(train_data):
                data_neural[index]['sessions'][j] = [[location, time] for location, time in zip(train_data[train_index][0], train_data[train_index][1])]
                data_neural[index]['train'].append(j)
    
    return data_neural

n = 4 # Number of train data
m = 2 # Number of test data
data_neural = format_dataneural(train_dataset, test_dataset, n, m)

In [3]:
output = {}
output['data_neural'] = data_neural
uid_list = [int(i) for i in range(len(output['data_neural']))]
output['uid_list'] = uid_list

In [4]:
import pickle
with open('DeepMove-master/data/my_data_neural.pk', 'wb') as f:
    pickle.dump(output, f)

# Debug

In [39]:
data_neural = format_dataneural(train_dataset, test_dataset)

In [40]:
output = {}
output['data_neural'] = {0: data_neural[0]}
output['uid_list'] = [0]
output

{'data_neural': {0: {'sessions': {0: [[22273, 5],
     [22273, 8],
     [22273, 11],
     [22273, 12],
     [22272, 15],
     [22478, 16],
     [15333, 17],
     [15332, 18],
     [15332, 36],
     [15333, 37]],
    1: [[15329, 0],
     [15329, 1],
     [14930, 2],
     [15329, 3],
     [15329, 4],
     [15329, 5],
     [15329, 8],
     [14930, 10],
     [15329, 11],
     [15130, 12]],
    2: [[26284, 27],
     [26284, 32],
     [26284, 35],
     [26887, 36],
     [26687, 39],
     [27290, 40],
     [27489, 42],
     [27090, 43],
     [27489, 44],
     [27489, 45]],
    3: [[26284, 32],
     [26284, 35],
     [26887, 36],
     [26687, 39],
     [27290, 40],
     [27489, 42],
     [27090, 43],
     [27489, 44],
     [27489, 45],
     [27489, 46]],
    4: [[26284, 35],
     [26887, 36],
     [26687, 39],
     [27290, 40],
     [27489, 42],
     [27090, 43],
     [27489, 44],
     [27489, 45],
     [27489, 46],
     [27290, 47]],
    5: [[26887, 36],
     [26687, 39],
     [27290, 40],
  

In [41]:
import pickle
with open('DeepMove-master/data/my_data_neural.pk', 'wb') as f:
    pickle.dump(output, f)

# Output

100 users, for each user 4 training data 2 testing data, 4 gpu, 0 accuracy, 10 loss

```
[[6107], [6305], [4687], [278], [76], [76], [76], [77], [6107], [5905], [6305], [4687], [278], [76], [76], [76], [77], [6107], [5905], [6107], [4687], [278], [76], [76], [76], [77], [6107], [5905], [6107], [6307], [278], [76], [76], [76], [77], [6107], [5905], [6107], [6307], [6107], [23482], [23484], [23484], [23483], [23482], [23484], [23295], [23097], [23298], [26487], [25725], [25724], [25725], [25724], [25925], [26924], [26924], [26925], [26925]]
[[13], [14], [15], [16], [22], [23], [24], [26], [27], [28], [14], [15], [16], [22], [23], [24], [26], [27], [28], [14], [15], [16], [22], [23], [24], [26], [27], [28], [14], [17], [16], [22], [23], [24], [26], [27], [28], [14], [17], [18], [11], [14], [19], [11], [12], [14], [20], [21], [22], [23], [10], [11], [12], [13], [17], [18], [19], [21], [22]]
[25724, 25725, 25724, 25925, 26924, 26924, 26925, 26925, 26924]
None
4687
```