In [122]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torch.nn.functional import normalize
from pandas import DataFrame
from datetime import datetime
import requests

In [None]:
%load_ext sql
%config SqlMagic.style = '_DEPRECATED_DEFAULT'
%config SqlMagic.autopandas = True
%sql postgresql+psycopg://admin:admin@localhost:5432/buses

In [None]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps" if torch.backends.mps.is_available() else "cpu"
)

print(f"Using {device} device")

In [125]:
def get_model_data_from_db(line_name: int, optional_direction: str | int):
    if isinstance(optional_direction, str): 
        res = %sql SELECT id FROM public.directions WHERE value = :optional_direction
        direction_id = res[0][0]
    else:
        direction_id = optional_direction
    records = %sql SELECT course_loid, day_course_loid, longitude, latitude, angle, reached_meters, order_in_course, last_ping_date FROM \
                public.positions WHERE optional_direction = :direction_id AND line_name = :line_name
    return records

In [126]:
def prepare_objects(data: DataFrame) -> torch.Tensor:
    parsed_x = []
    parsed_y = []
    courses = {}
    counter = 0
    for _, row in data.iterrows():
        counter += 1
        if counter % 10000 == 0:
            print(f"Parsed {counter}")
        if row.course_loid not in courses:
            courses[row.course_loid] = requests.get(
                f"https://przystanki.bialystok.pl/portal/getRealCourse.json?courseId={int(row.course_loid)}"
            ).json()
        predicted_stop_point = (
            (row.order_in_course + 1)
            if data[
                (
                    (data["day_course_loid"] == row.day_course_loid)
                    & (data["order_in_course"] == (row.order_in_course + 1))
                )
            ]
            .any()
            .any()
            else row.order_in_course
        )
        dt = datetime.fromtimestamp(row.last_ping_date / 1000)
        time = dt.hour * 3600 + dt.minute * 60 + dt.second
        arrived_dt = datetime.fromtimestamp(
            data[
                (
                    (data["day_course_loid"] == row.day_course_loid)
                    & (data["order_in_course"] == predicted_stop_point)
                )
            ]
            .nsmallest(1, "last_ping_date")
            .last_ping_date.item()
            / 1000
        )
        arrived_time = (
            arrived_dt.hour * 3600 + arrived_dt.minute * 60 + arrived_dt.second
        )
        parsed_x.append(
                [
                    row.longitude,
                    row.latitude,
                    float(row.angle),
                    float(row.reached_meters),
                    float(row.order_in_course),
                    time,
                    [
                        stop["scheduledDepartureSec"]
                        for stop in courses[row.course_loid]["realCourse"]["stoppings"]
                        if stop["orderInCourse"] == predicted_stop_point
                    ][0],
                    predicted_stop_point,
                ]
        )
        parsed_y.append([arrived_time])
    
    parsed_x = normalize(torch.tensor(parsed_x, dtype=torch.float32, device="cuda"))
    parsed_y = torch.tensor(parsed_y, dtype=torch.float32, device="cuda")

    
    return zip(parsed_x, parsed_y)

In [127]:
net = nn.Sequential(
    nn.Linear(8, 256), 
    nn.Dropout(0.2),
    nn.LeakyReLU(),
    nn.Linear(256, 128),
    nn.LeakyReLU(),
    nn.Linear(128, 64),
    nn.LeakyReLU(),
    nn.Linear(64, 32),
    nn.ReLU(),
    nn.Linear(32, 1),
).to(device)

In [128]:
def train(model, dataset, num_epochs=100, batch_size=32, learning_rate=0.001):
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
    criterion = nn.MSELoss()  # Użycie MSE, jeśli Y jest wartością ciągłą
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    for epoch in range(num_epochs):
        for inputs, targets in dataloader:
            inputs = inputs.to(device)
            targets = targets.to(device)
            optimizer.zero_grad()  # Zerowanie gradientów
            outputs = model(inputs)  # Forward pass
            loss = criterion(outputs.squeeze(), targets)  # Obliczanie straty
            loss.backward()  # Backward pass
            optimizer.step()  # Aktualizacja wag

        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")

In [None]:
data = get_model_data_from_db(100, 12282)

In [None]:
prepared = prepare_objects(data)

In [None]:
data_zipped = list(prepared)

In [None]:
train(net, prepared)