In [None]:
import geopandas as gpd
import pandas as pd
import copy
import os
import warnings
import random
from pprint import pprint
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim

from shapely.geometry import LineString, Point
import h3
from srai.h3 import h3_to_geoseries

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
DUMMIES = pd.get_dummies([0, 1, 2, 3, 4, 5, 6])

In [None]:
def get_hex_neigh(traj):
    h3_dist = []
    h3_cells = []
    points = traj.coords
    for idx in range(len(points) - 1):
        a, b = points[idx], points[idx + 1]
        start_hex = h3.latlng_to_cell(a[1], a[0], H3_RESOLUTION)
        end_hex = h3.latlng_to_cell(b[1], b[0], H3_RESOLUTION)
        if start_hex == end_hex:
            continue
        for h3_cell in h3.grid_path_cells(start_hex, end_hex):
            if not h3_cells or h3_dist[-1] != h3_cell:
                # print(h3.grid_disk(h3_cell, 1))
                h3_dist.append(h3_cell)
                h3_cells.append(h3.grid_disk(h3_cell, 1))
    return h3_cells

In [None]:
def get_y(traj):
    if not traj:
        return None
    y = []
    for i in range(len(traj) - 1):
        target = traj[i + 1][0]
        prop = np.array(traj[i])
        res = prop == target
        # print(res)
        idx = np.where(res)[0]
        try:
            dummy = DUMMIES.loc[idx].values[0]
        except:
            print(traj)
        y.append(dummy)
    y.append(DUMMIES.loc[0].values)
    return y

In [None]:
def embed(traj):
    emb = []
    for t in traj:
        temp = []
        for h in t:
            try:
                temp.append(embeddings.loc[h].values)
            except:
                return None
        emb.append(temp)
    return emb

In [None]:
H3_RESOLUTION = 9
seq_length = 8
data_path = 'input_data'
path = f'data_res{H3_RESOLUTION}_seq{seq_length}'
path = os.path.join(data_path, path)

# Train Test load

In [None]:
X_train, y_train = torch.load(os.path.join(path, 'train.pt'))
X_test, y_test = torch.load(os.path.join(path, 'test.pt'))

In [None]:
print("Train size")
print(X_train.shape, y_train.shape)
print("Test size")
print(X_test.shape, y_test.shape)

In [None]:
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1, dropout_rate=0.5):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout_rate)
        self.fc = nn.Linear(hidden_size, output_size)
        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, x):
        x = x.unsqueeze(0)
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)

        out, _ = self.lstm(x, (h0, c0))
        out = self.dropout(out[:, -1, :])
        out = self.fc(out)
        return out


In [None]:
def load_model(model_path, input_size, hidden_size, output_size, num_layers=1, dropout_rate=0.5):
    model = LSTMModel(input_size, hidden_size, output_size, num_layers, dropout_rate)
    model.load_state_dict(torch.load(model_path))
    model.eval()
    return model


model_path = 'best_model.pth'
input_size = len(X_train.iloc[0][0])
hidden_size = 128
output_size = 6
num_layers = 2
dropout_rate = 0.6

model = load_model(model_path, input_size, hidden_size, output_size, num_layers, dropout_rate)
print(model)

In [None]:
dist = []
for i in y_train:
    id_max = np.argmax(i)
    dist.append(id_max)

test_dist = []
for i in y_test:
    id_max = np.argmax(i)
    test_dist.append(id_max)

fig, ax = plt.subplots(1, 2, figsize=(10, 5))
ax[0].hist(dist, bins=range(7))
ax[0].set_title('Train')
ax[0].set_xlabel('Class')
ax[0].set_ylabel('Frequency')
ax[0].grid(axis='y', alpha=0.75)

ax[1].hist(test_dist, bins=range(7))
ax[1].set_title('Test')
ax[1].set_xlabel('Class')
ax[1].set_ylabel('Frequency')
ax[1].grid(axis='y', alpha=0.75)
plt.show()

In [None]:
embeddings = pd.read_parquet(os.path.join('output_data', 'embeddings_9.parquet'))
embeddings.shape

In [None]:
gdf = gpd.read_parquet(os.path.join('output_data', 'geolife_mpd.parquet'))
gdf.shape

In [None]:
gdf_agg = gdf.groupby('trajectory_id').agg(
    {'geometry': LineString, 'date_str': list, 'speed': list, 'direction': list, 'altitude': list,
     'trajectory_id': 'first'})
gdf_agg.shape

In [None]:
gdf_agg.head()

In [None]:
ls = gdf_agg['geometry'].iloc[515]
h3_cells = []
points = ls.coords
for idx in range(len(points) - 1):
    a, b = points[idx], points[idx + 1]
    start_hex = h3.latlng_to_cell(a[1], a[0], H3_RESOLUTION)
    end_hex = h3.latlng_to_cell(b[1], b[0], H3_RESOLUTION)
    if start_hex == end_hex:
        continue
    for h3_cell in h3.grid_path_cells(start_hex, end_hex):
        if not h3_cells or h3_cells[-1] != h3_cell:
            h3_cells.append(h3_cell)
m = h3_to_geoseries(h3_cells).reset_index().explore("index", tiles="CartoDB positron", opacity=0.4)
gpd.GeoSeries([ls]).explore(m=m)

In [None]:
s_len = 11
neigh = h3.grid_disk(h3_cells[s_len - 1], 1)
m = h3_to_geoseries(neigh).reset_index().explore("index", tiles="CartoDB positron", opacity=0.4)
gpd.GeoSeries([ls]).explore(m=m)

In [None]:
t = gdf_agg['geometry'].iloc[515]
cells = get_hex_neigh(t)
y = get_y(cells)
em = embed(cells)
len(em), len(y)

In [None]:
test = np.array(em)
flatten = test.reshape(test.shape[0], -1)
flatten = torch.tensor(flatten, dtype=torch.float32)
flatten = flatten[:s_len]
flatten.shape

In [None]:
pred = torch.argmax(model(flatten)) + 1
pred, np.argmax(y[s_len - 1])

In [None]:
neigh = h3_cells[:s_len] + [h3.grid_disk(h3_cells[s_len - 1])[pred.item()]]
m = h3_to_geoseries(neigh)
m = m.reset_index().explore("index", tiles="CartoDB positron", opacity=0.4)
gpd.GeoSeries([ls]).explore(m=m)
m

# SEKWENCJA

In [None]:
t = gdf_agg['geometry'].iloc[515]
cells = get_hex_neigh(t)
preds = []
pred_hex = []
for i in range(len(cells)):
    y = get_y(cells)
    em = embed(cells)
    test = np.array(em)
    flatten = test.reshape(test.shape[0], -1)
    flatten = torch.tensor(flatten, dtype=torch.float32)
    flatten = flatten[:i + 1]
    pred = torch.argmax(model(flatten)) + 1
    preds.append(pred.item() + 1)
    pred_hex.append(h3.grid_disk(h3_cells[i])[pred.item()])
trues = [np.argmax(i) for i in y]
# score

In [None]:
m = h3_to_geoseries(pred_hex).reset_index().explore("index", tiles="CartoDB positron", opacity=0.4)
gpd.GeoSeries([ls]).explore(m=m)