In [None]:
import lzma
import geopandas as gpd
import pandas as pd
import numpy as np
from pandarallel import pandarallel
from skspatial.objects import Line
from scipy.spatial import cKDTree
import geopy.distance as distance
import seaborn as sns
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split


In [None]:
## Setup fase
pandarallel.initialize()

In [None]:
def load_veiculos(path, COD):
    file = lzma.open(path)
    df = pd.read_json(file, lines=True)
    df = df[df["COD_LINHA"] == COD]
    df["LAT"] = df["LAT"].apply(lambda x: float(x.replace(',', '.')))
    df["LON"] = df["LON"].apply(lambda x: float(x.replace(',', '.')))
    df["DTHR"] = pd.to_datetime(
    df["DTHR"], format="%d/%m/%Y %H:%M:%S").dt.tz_localize("America/Sao_Paulo")
    df['coords'] = df.apply(lambda x: (x['LAT'], x['LON']), axis=1)
    df.drop(columns=["LAT", "LON"], inplace=True)
    return df

def load_shape(path, COD):
    df = pd.read_json(path)
    df = df[df["COD"] == COD]
    df["LAT"] = df["LAT"].apply(lambda x: float(x.replace(',', '.')))
    df["LON"] = df["LON"].apply(lambda x: float(x.replace(',', '.')))
    df['coords'] = df.apply(lambda x: (x['LAT'], x['LON']), axis=1)
    df = df.groupby(['COD', 'SHP'])['coords'].apply(list).reset_index()
    return df

In [None]:
dfVeiculos = load_veiculos("../../data/veiculos/2023_05_03_veiculos.json.xz", '022')
dfShape = load_shape("../../data/shape.json", "022")

In [None]:
def findNearestPoint(vehicle, shape):
    nA = np.array(list(vehicle.coords))
    nB = np.array(list(shape.coords))

    nB = nB.reshape(-1, 2)

    btree = cKDTree(nB)
    dist, idx = btree.query(nA, k=1)

    vehicle['nearest'] = nB[idx].tolist()
    vehicle['idx'] = idx / len(nB)

    return vehicle

dfVeiculos = findNearestPoint(dfVeiculos, dfShape)


In [None]:
dfVeiculos["distances"] = dfVeiculos.parallel_apply(lambda x: distance.distance(x['coords'], x['nearest']).meters, axis=1)
dfVeiculos = dfVeiculos[dfVeiculos["distances"] < 200]

dfVeiculos["DTHR"] = dfVeiculos["DTHR"].dt.strftime("%H:%M")

dfVeiculos.set_index("DTHR", inplace=True)

dfVeiculos[dfVeiculos["VEIC"] == "BL605"]["idx"].plot().invert_xaxis()
plt.show()

In [None]:
# Normalize the 'idx' column to be between 0 and 1
scaler = MinMaxScaler(feature_range=(0, 1))
dfVeiculos['idx'] = scaler.fit_transform(dfVeiculos['idx'].values.reshape(-1, 1))

# Sort the DataFrame by the 'DTHR' column (time)
dfVeiculos.sort_values(by='DTHR', inplace=True)

# Divide the data into 3 equal parts
data_1, data_2, data_3 = np.array_split(dfVeiculos, 3)

# Function to create time-series dataset for LSTM
def create_dataset(dataset, look_back=1):
    X, Y = [], []
    for i in range(len(dataset) - look_back):
        X.append(dataset[i:(i + look_back), 0])
        Y.append(dataset[i + look_back, 0])
    return np.array(X), np.array(Y)

# Choose a look_back window size
look_back = 3

# Prepare the data for each part
X1, y1 = create_dataset(data_1['idx'].values, look_back)
X2, y2 = create_dataset(data_2['idx'].values, look_back)
X3, y3 = create_dataset(data_3['idx'].values, look_back)

# Combine the data from all parts
X = np.concatenate((X1, X2, X3))
y = np.concatenate((y1, y2, y3))

# Reshape the input to be [samples, time steps, features]
X = np.reshape(X, (X.shape[0], X.shape[1], 1))

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = Sequential()
model.add(LSTM(50, input_shape=(look_back, 1)))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')

model.fit(X_train, y_train, epochs=100, batch_size=1, verbose=2)

# Evaluate the model on the test set
loss = model.evaluate(X_test, y_test, verbose=0)

# Make predictions
y_train_pred = model.predict(X_train)
y_test_pred = model.predict(X_test)

# Invert the scaling to get the original values
y_train_pred = scaler.inverse_transform(y_train_pred)
y_test_pred = scaler.inverse_transform(y_test_pred)
y_train = scaler.inverse_transform(y_train.reshape(-1, 1))
y_test = scaler.inverse_transform(y_test.reshape(-1, 1))

