In [1]:
import logging
import sys

root = logging.getLogger()
root.setLevel(logging.INFO)

handler = logging.StreamHandler(sys.stdout)
handler.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)
root.addHandler(handler)

In [2]:
import sys
from scripts.path_findings import ch_builder

import numpy as np

sys.path.append('../scripts')
sys.path.append('../')

from scripts import graph_osm_loader
from scripts import pipeline, centroids_graph_builder, clustering
from scripts.path_findings import excraction_pfa, dijkstra_pfa
from scripts import utils

2025-01-24 20:19:48,553 - numexpr.utils - INFO - Note: NumExpr detected 32 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 16.
2025-01-24 20:19:48,554 - numexpr.utils - INFO - NumExpr defaulting to 16 threads.


In [3]:
from torch.utils.data import DataLoader
import pytorch_lightning as pl

In [4]:
GRAPH_ID = 'R13470549'  # R13470549 R2555133 R3766483
# примеры id есть в graph_osm_loader.py
g = graph_osm_loader.get_graph(GRAPH_ID)
print(len(g.nodes), len(g.edges))

715 1038


In [5]:

cms_resolver = clustering.LouvainCommunityResolver(resolution=100)

t, cg = centroids_graph_builder.CentroidGraphBuilder().build_with_time(g, cms_resolver)

find centroids:   0%|          | 0/269 [00:00<?, ?it/s]

find edges:   0%|          | 0/269 [00:00<?, ?it/s]

In [20]:
import networkx as nx
points = utils.read_points(GRAPH_ID, g, num=10000)
data = []
for p1, p2 in points:
    path = nx.dijkstra_path_length(g, p1,p2,weight='length')
    c1,c2 = g.nodes()[p1]['cluster'], g.nodes()[p2]['cluster']
    vector1 = np.zeros(len(cg.g.nodes), dtype=np.float32)
    vector2 = np.zeros(len(cg.g.nodes), dtype=np.float32)
    vector1[c1]=1
    vector1[c2]=1
    data.append((vector1,np.array([path], dtype=np.float32)))

In [55]:
from scripts.path_findings.h_search_builder import MinClusterDistance
import torch
alg = MinClusterDistance(workers=20).build_astar(g, cms_resolver) 
a = alg.h.d_cluster

U,S,V = np.linalg.svd(a, full_matrices=False)


In [56]:
k = 2 
uk = U[:,:k]
sk = np.diag(S[:k])
vk = V[:k,:]

aa = uk @ sk @ vk
alg.h.d_cluster = aa

In [52]:
from scripts.path_findings.h_search_builder import MinClusterDistance
import torch
Q = 10000
a = torch.tensor(MinClusterDistance(workers=20).build_astar(g, cms_resolver).h.d_cluster, dtype=torch.float64)
x = torch.ones((len(a), Q),requires_grad=True, dtype=torch.float64)
y = torch.ones((Q, len(a)),requires_grad=True, dtype=torch.float64)
alpha = 0.00000001
b = 0.9
for i in range(15):
    aa = torch.mm(x,y)
    loss = torch.sum(aa-a)**2
    print(loss)
    loss.backward()
    with torch.no_grad():
        x -=x.grad * alpha
        y -=y.grad * alpha
    x.grad.data.zero_()
    y.grad.data.zero_()
    alpha*=b


tensor(1.8814e+17, dtype=torch.float64, grad_fn=<PowBackward0>)
tensor(1.5501e+31, dtype=torch.float64, grad_fn=<PowBackward0>)
tensor(2.0474e+72, dtype=torch.float64, grad_fn=<PowBackward0>)
tensor(3.0953e+195, dtype=torch.float64, grad_fn=<PowBackward0>)
tensor(inf, dtype=torch.float64, grad_fn=<PowBackward0>)
tensor(inf, dtype=torch.float64, grad_fn=<PowBackward0>)
tensor(nan, dtype=torch.float64, grad_fn=<PowBackward0>)
tensor(nan, dtype=torch.float64, grad_fn=<PowBackward0>)
tensor(nan, dtype=torch.float64, grad_fn=<PowBackward0>)
tensor(nan, dtype=torch.float64, grad_fn=<PowBackward0>)
tensor(nan, dtype=torch.float64, grad_fn=<PowBackward0>)
tensor(nan, dtype=torch.float64, grad_fn=<PowBackward0>)
tensor(nan, dtype=torch.float64, grad_fn=<PowBackward0>)
tensor(nan, dtype=torch.float64, grad_fn=<PowBackward0>)
tensor(nan, dtype=torch.float64, grad_fn=<PowBackward0>)


In [22]:
from torch.utils.data import Dataset
import torch

class FacePointDataset(Dataset):
    def __init__(self):
        super(Dataset, self).__init__()
        self.data = data
    
    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        f,g  = data[idx]
        return torch.from_numpy(f), torch.tensor(g) 

In [23]:
from torch import nn


class MyModel(nn.Sequential):
    def __init__(self, num_points=len(cg.g.nodes)):
        super().__init__()
        self.ln = nn.Linear(num_points, 2000)
        self.rl = nn.ReLU()
        self.ln1 = nn.Linear(2000, 1)
    # def forward(self, input):
    #     print(input.shape)
    #     input = self.ln

In [24]:


import torchmetrics


class MyModelTrainer(pl.LightningModule):
    def __init__(self):
        super().__init__()
        self.model = MyModel()
        self.metrics = torchmetrics.MeanSquaredError(squared=True)
        self.loss = nn.MSELoss()

    def training_step(self, batch, batch_idx):
        x, y_gt = batch
        y_pr = self.model(x)
        loss = self.loss(y_pr, y_gt)
        metrics = {"train_loss": loss}
        self.log_dict(metrics, prog_bar=True, on_step=True, on_epoch=True, logger=True)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y_gt = batch
        y_pr = self.model(x)
        
        loss = self.metrics(y_pr, y_gt)
        metrics = {"valid_loss": loss}
        self.log_dict(metrics, prog_bar=True, on_step=True, on_epoch=True, logger=True)
        return metrics

    def configure_optimizers(self):
        """Define optimizers and LR schedulers."""
        optimizer = torch.optim.Adam(self.parameters(), lr=1e-3, weight_decay=5e-4)
        #optimizer = torch.optim.SGD(self.model.parameters(), lr=1e-3, momentum=0.9)

        lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(
            optimizer,
            gamma=0.95
        )
        lr_dict = {
            # The scheduler instance
            "scheduler": lr_scheduler,
            # The unit of the scheduler's step size, could also be 'step'.
            # 'epoch' updates the scheduler on epoch end whereas 'step'
            # updates it after a optimizer update.
            "interval": "epoch",
            # How many epochs/steps should pass between calls to
            # `scheduler.step()`. 1 corresponds to updating the learning
            # rate after every epoch/step.
            "frequency": 1,
            # Metric to to monitor for schedulers like `ReduceLROnPlateau`
            "monitor": "train_loss",
        }

        return [optimizer], [lr_dict]


In [25]:
DEVICE = 'cuda'  if torch.cuda.is_available() else 'cpu'
DEVICE

'cuda'

In [26]:


def train():
    dataset = FacePointDataset()
    
    train, valid = torch.utils.data.random_split(dataset, [0.9, 0.1])
    
    dl_train = DataLoader(train, batch_size=30, shuffle=True, num_workers=4, persistent_workers=True)
    dl_valid = DataLoader(valid, batch_size=10, shuffle=False, num_workers=4, persistent_workers=True)
    
    model = MyModelTrainer()
    
    trainer = pl.Trainer(
            accelerator=DEVICE,
            devices=1,
            max_epochs=20,
            log_every_n_steps=5)
    trainer.fit(model, dl_train, dl_valid)
    
    torch.save(model.model.state_dict(), '../data/models/model.ckpt')
    
    return model.model

In [27]:
m = train()

2025-01-24 18:53:19,948 - pytorch_lightning.utilities.rank_zero - INFO - GPU available: True (cuda), used: True
2025-01-24 18:53:19,949 - pytorch_lightning.utilities.rank_zero - INFO - TPU available: False, using: 0 TPU cores
2025-01-24 18:53:19,949 - pytorch_lightning.utilities.rank_zero - INFO - HPU available: False, using: 0 HPUs
2025-01-24 18:53:19,951 - pytorch_lightning.accelerators.cuda - INFO - LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
2025-01-24 18:53:19,966 - pytorch_lightning.callbacks.model_summary - INFO - 
  | Name    | Type             | Params | Mode 
-----------------------------------------------------
0 | model   | MyModel          | 542 K  | train
1 | metrics | MeanSquaredError | 0      | train
2 | loss    | MSELoss          | 0      | train
-----------------------------------------------------
542 K     Trainable params
0         Non-trainable params
542 K     Total params
2.168     Total estimated model params size (MB)
6         Modules in train mode
0         Mo

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



In [31]:
x = m(torch.from_numpy(data[90][0]).unsqueeze(0)).detach().numpy()

In [32]:
x

array([[4007.9956]], dtype=float32)

In [211]:
args = np.argwhere(x >= 0.5)

In [212]:
args

array([[  0,   0],
       [  0,   1],
       [  0,   2],
       [  0,   3],
       [  0,   7],
       [  0,   8],
       [  0,   9],
       [  0,  10],
       [  0,  11],
       [  0,  15],
       [  0,  16],
       [  0,  27],
       [  0,  51],
       [  0,  69],
       [  0,  94],
       [  0,  95],
       [  0,  96],
       [  0,  97],
       [  0,  98],
       [  0, 102],
       [  0, 103],
       [  0, 176],
       [  0, 178],
       [  0, 180],
       [  0, 183],
       [  0, 184],
       [  0, 191],
       [  0, 192],
       [  0, 193],
       [  0, 194],
       [  0, 195],
       [  0, 221],
       [  0, 225],
       [  0, 226],
       [  0, 232],
       [  0, 233],
       [  0, 234],
       [  0, 237],
       [  0, 238],
       [  0, 248],
       [  0, 256],
       [  0, 257],
       [  0, 261],
       [  0, 263],
       [  0, 266]])

In [213]:
len(args), sum(data[0][1])

(45, np.float32(9.0))

In [214]:
from scripts.path_findings import dijkstra_pfa
d = dijkstra_pfa.Dijkstra(g)
d.find_path_cls(points[0][0],points[0][1], cms = set([int(a[0]) for a in args]))

KeyError: 5306954570

In [33]:
nx.dijkstra_path_length(g,points[90][0], points[90][1], weight='length')

np.float64(5075.117000000001)