In [1]:
import pandas as pd
import geopandas as gpd
import osmnx as ox
import sys
sys.path.append('../.') 
from src.graph.create_osmnx_graph import OSMnxGraph
import numpy as np
import matplotlib.pyplot as plt
%load_ext autoreload
%autoreload 2
import networkx as nx
%reload_ext autoreload
from src.baseline_models.GCN.supervised_node_classification import SupervisedNodeClassificationGNN
from src.baseline_models.GCN.gcn import GCNModel
import torch
from sklearn.model_selection import train_test_split
import pytorch_lightning as pl
from src.baseline_models.GCN.GraphData import GraphData
import warnings
warnings.filterwarnings('ignore')
from shapely.geometry import Point

# Wczytanie danych

In [2]:
df_accidents = pd.read_csv("../data/wypadki-pl/accidents.csv")
df_accidents.drop(df_accidents[(df_accidents['mie_nazwa'] != 'Warszawa')].index, inplace=True)
df_accidents.drop(columns='uczestnicy', inplace=True)
geometry = [Point(xy) for xy in zip(df_accidents['wsp_gps_x'], df_accidents['wsp_gps_y'])]
gdf_accidents = gpd.GeoDataFrame(df_accidents, geometry=geometry)
gdf_accidents.drop(columns=['wsp_gps_x', 'wsp_gps_y'], inplace=True)
G =ox.graph.graph_from_place("Warsaw, Poland", network_type="drive", simplify=False)
gdf_nodes, gdf_edges = ox.graph_to_gdfs(G)
gdf_nodes

In [3]:
gdf_edges

## Informacje o przetwarzaniu cech

### Krawędzie

Preprocessing obejmował:
* lanes - dodanie domyślnej wartości jako średniej wartości z kolumny
* maxspeed - dodanie domyślnej wartości jako średniej wartości z kolumny
* width - dodanie domyślnej wartości jako średniej wartości z kolumny
* pozostałe - dodanie wartości "unspecified"


### Wierzchołki

Preprocessing obejmował rozbicie CountVectorizerem wartości z kolumny "highway", "street_count" pozostawiono jako wartość numeryczną bez zmian


W obu przypadkach (krawędzie i wierzchołki) usunięto kolumnę "ref".


# Agregacja do wierzchołków

## 1. Tworzenie grafu i statystyki

In [4]:
graph_embedder = OSMnxGraph(gdf_events, gdf_nodes, gdf_edges)

graph_data = graph_embedder.create_graph(element_type='node', aggregation_method='mean', normalize_y=False)
graph_data

In [5]:
graph_embedder.show_statistics()

In [6]:
graph_data.num_features

In [7]:
len(graph_data.y.unique())

Poniżej znajduje się wywołanie metody do pozyskiwania cech wierzchołków.

In [8]:
features = graph_embedder.get_node_attrs()
features

## 2. Uczenie i testowanie modelu - Supervised node classification

In [9]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
num_features = graph_data.num_features
hidden_dim = 256
out_dim = 128
num_classes = len(graph_data.y.unique())

gnn = GCNModel(in_dim=num_features, hidden_dim=hidden_dim, out_dim=out_dim)

model = SupervisedNodeClassificationGNN(gnn=gnn, emb_dim=out_dim, num_classes=num_classes, lr=0.0001)

In [10]:

nodes_labels = {'node': features.index.to_numpy(), 'label': graph_data.y.cpu().numpy()}
df_to_split = pd.DataFrame(nodes_labels)

df_train, df_test = train_test_split(df_to_split, test_size=0.2, random_state=42, stratify=df_to_split['label'])
df_val, df_test = train_test_split(df_test, test_size=0.5, random_state=42, stratify=df_test['label'])

train_nodes = df_train['node']
val_nodes = df_val['node']
test_nodes = df_test['node']

train_mask = []   
val_mask = []
test_mask = []

for i in range(len(graph_data.y)):

  if i in train_nodes:
    train_mask.append(True)
    val_mask.append(False)
    test_mask.append(False)
  elif i in val_nodes:
    train_mask.append(False)
    val_mask.append(True)
    test_mask.append(False)
  elif i in test_nodes:
    train_mask.append(False)
    val_mask.append(False)
    test_mask.append(True)

graph_data.train_mask = torch.tensor(train_mask).cpu()
graph_data.val_mask = torch.tensor(val_mask).cpu()
graph_data.test_mask = torch.tensor(test_mask).cpu()

In [None]:

trainer = pl.Trainer(max_epochs=50)
datamodule = GraphData([graph_data])
trainer.fit(model=model, datamodule=datamodule)

In [12]:
trainer.test(model=model, datamodule=datamodule, verbose=False)

In [13]:
test_auc = trainer.test(model=model, datamodule=datamodule, verbose=False)[0]["test/auc_weighted"]

z, y, y_pred = trainer.predict(model=model, datamodule=datamodule)[0]

print(f'AUC test = {test_auc * 100.:.2f}[%]')

In [14]:
y_pred

In [15]:
print(y[test_mask].numpy())

# Agregacja do krawędzi

## 1. Tworzenie grafu i statystyki

In [41]:
graph_embedder = OSMnxGraph(gdf_events, gdf_nodes, gdf_edges)

graph_data = graph_embedder.create_graph(aggregation_type='edge')
graph_data

In [42]:
graph_embedder.show_statistics()

In [43]:
features = graph_embedder.get_edge_attrs()

## 2. Uczenie i testowanie modelu - Supervised node classification

In [None]:

device = 'cuda' if torch.cuda.is_available() else 'cpu'
num_features = graph_data.num_features
hidden_dim = 256
out_dim = 128
num_classes = len(graph_data.y.unique())

gnn = GCNModel(in_dim=num_features, hidden_dim=hidden_dim, out_dim=out_dim)

model = SupervisedNodeClassificationGNN(gnn=gnn, emb_dim=out_dim, num_classes=num_classes, lr=0.0001)

nodes_labels = {'node': features.index.to_numpy(), 'label': graph_data.y.cpu().numpy()}
df_to_split = pd.DataFrame(nodes_labels)

df_train, df_test = train_test_split(df_to_split, test_size=0.2, random_state=42, stratify=df_to_split['label'])
df_val, df_test = train_test_split(df_test, test_size=0.5, random_state=42, stratify=df_test['label'])

train_nodes = df_train['node']
val_nodes = df_val['node']
test_nodes = df_test['node']

train_mask = []   
val_mask = []
test_mask = []

for i in range(len(graph_data.y)):

  if i in train_nodes:
    train_mask.append(True)
    val_mask.append(False)
    test_mask.append(False)
  elif i in val_nodes:
    train_mask.append(False)
    val_mask.append(True)
    test_mask.append(False)
  elif i in test_nodes:
    train_mask.append(False)
    val_mask.append(False)
    test_mask.append(True)

graph_data.train_mask = torch.tensor(train_mask).cpu()
graph_data.val_mask = torch.tensor(val_mask).cpu()
graph_data.test_mask = torch.tensor(test_mask).cpu()

trainer = pl.Trainer(max_epochs=50)
datamodule = GraphData([graph_data])
trainer.fit(model=model, datamodule=datamodule)

In [45]:
trainer.test(model=model, datamodule=datamodule, verbose=False)

In [46]:
test_auc = trainer.test(model=model, datamodule=datamodule, verbose=False)[0]["test/auc_weighted"]

z, y, y_pred = trainer.predict(model=model, datamodule=datamodule)[0]

print(f'AUC test = {test_auc * 100.:.2f}[%]')

In [47]:
y_pred

In [48]:
y_pred.max()