In [None]:
%env CUDA_VISIBLE_DEVICES=0

import numpy as np
import networkx as nx
import pandas as pd
import cudf
import cugraph
import time
from cuxfilter.charts.datashader.custom_extensions.graph_assets import calc_connected_edges

# import plotly.express as px
# import plotly.graph_objects as go

ITERATIONS=500
THETA=1.0
OPTIMIZE=True

import holoviews as hv

from colorcet import fire

from holoviews.operation.datashader import datashade, dynspread
from holoviews.operation import decimate

from dask.distributed import Client

client = Client()
hv.notebook_extension('bokeh', 'matplotlib')
decimate.max_samples=20000
dynspread.threshold=0.01
datashade.cmap=fire[40:]
sz = dict(width=150,height=150)
%opts RGB [xaxis=None yaxis=None show_grid=False bgcolor="black"]


In [None]:
def get_connected_dots_for_graph(edges):
    edgelist = pd.DataFrame(edges, columns=["source", "target"])
    G = cugraph.Graph()
    G.from_pandas_edgelist(edgelist, source="source", destination="target", renumber=True)
    # G.from_numpy_array(graph_edgelist.values, nodes=np.unique(graph_edgelist.values))
    # G.from_numpy_array(graph_edgelist.values, nodes=None)
    # G.from_cudf_edgelist(cudf.from_dataframe(graph_edgelist.rename({'target': 'destination'}, axis='columns').iloc[:1], allow_copy=True), renumber=False)
    G.number_of_nodes(), G.number_of_edges()


    start = time.time()
    pos_gdf = cugraph.layout.force_atlas2(G,
                                    max_iter=ITERATIONS,
                                    pos_list=None,
                                    outbound_attraction_distribution=True,
                                    lin_log_mode=False,
                                    edge_weight_influence=1.0,
                                    jitter_tolerance=1.0,
                                    barnes_hut_optimize=OPTIMIZE,
                                    barnes_hut_theta=THETA,
                                    scaling_ratio=2.0,
                                    strong_gravity_mode=False,
                                    gravity=1.0,
                                    verbose=False,
                                    callback=None,
    )
    elapsed = time.time() - start
    print("Cugraph time for obtaining layout: " + str(elapsed))
    connected = calc_connected_edges(pos_gdf,
                                    cudf.from_dataframe(edgelist, allow_copy=True),
                                    node_x="x",
                                    node_y="y",
                                    node_x_dtype="float32",
                                    node_y_dtype="float32",
                                    node_id="vertex",
                                    edge_source="source",
                                    edge_target="target",
                                    edge_aggregate_col=None,
                                    edge_render_type="direct",
                                    )

    return connected


## Weather ERA5

In [None]:
era5_dataset = np.load("../../data/era5_usa.npz", allow_pickle=True)
edges = era5_dataset["edges"]

In [None]:
pd.DataFrame(data=edges, columns=["source", "target"]).groupby("source").count()["target"].value_counts()

In [None]:
list(era5_dataset.keys())


In [None]:
era5_dataset["spatial_node_features"]

In [None]:
latitudes = era5_dataset["spatial_node_features"][..., 0]
longitudes = era5_dataset["spatial_node_features"][..., 1]


In [None]:
latitudes

In [None]:
longitudes

In [None]:
num_latitude_values = len(set(*latitudes.tolist()))
num_latitude_values


In [None]:
num_longitude_values = len(set(*longitudes.tolist()))
num_longitude_values

In [None]:
def create_edge_list(m, n):
    edge_list = []

    for i in range(m):
        for j in range(n):
            node = i * m + j
            
            # Connect to the right neighbor
            if j < n - 1:
                right_neighbor = node + 1
                edge_list.append((node, right_neighbor))
                edge_list.append((right_neighbor, node))
                if i < m - 1:
                    right_bottom_neighbor = (i + 1) * m + j + 1
                    edge_list.append((right_bottom_neighbor, node))
                    edge_list.append((node, right_bottom_neighbor))
                if i > 0:
                    right_top_neighbor = (i - 1) * m + j + 1
                    edge_list.append((right_top_neighbor, node))
                    edge_list.append((node, right_top_neighbor))

            # Connect to the bottom neighbor
            if i < m - 1:
                bottom_neighbor = node + m
                edge_list.append((node, bottom_neighbor))
                edge_list.append((bottom_neighbor, node))
                

    edge_list = np.array(edge_list).T
    
    return edge_list

In [None]:
%%opts RGB [tools=["hover"] width=1200 height=1200]
connected = get_connected_dots_for_graph(edges)
r_direct = hv.Curve(connected, label="Direct")
datashade(r_direct)

In [None]:
%%opts RGB [tools=["hover"] width=1200 height=1200]
era5_correct_edges = create_edge_list(m=num_latitude_values, n=num_longitude_values).T

connected = get_connected_dots_for_graph(era5_correct_edges)
r_direct = hv.Curve(connected, label="Direct")
datashade(r_direct)

In [None]:
# %%opts RGB [tools=["hover"] width=1200 height=1200]
# edges_2 = create_edge_list(5, 4).T

# connected = get_connected_dots_for_graph(edges_2)
# r_direct = hv.Curve(connected, label="Direct")
# datashade(r_direct)

In [None]:
list(era5_dataset.keys())

In [None]:
era5_targets = era5_dataset["targets"]

In [None]:
era5_targets, np.isnan(era5_targets).any()

In [None]:
era5_temporal_features = era5_dataset["temporal_node_features"]

In [None]:
era5_temporal_features, np.isnan(era5_temporal_features).any(), era5_dataset["temporal_node_feature_names"]

In [None]:
era5_spatiotemporal_features = era5_dataset["spatiotemporal_node_features"]

In [None]:
era5_spatiotemporal_features, era5_dataset["spatiotemporal_node_feature_names"]

In [None]:
np.cumprod(era5_spatiotemporal_features.shape)

In [None]:
empty_features_mask = np.isnan(era5_spatiotemporal_features).sum((0, 1)) == 209879616
empty_features_mask

In [None]:
era5_spatiotemporal_features_only_nonempty = era5_spatiotemporal_features[..., ~empty_features_mask]
nonempty_features_with_nan_mask = np.isnan(era5_spatiotemporal_features_only_nonempty).sum((0, 1))
nonempty_features_with_nan_mask


In [None]:
np.isnan(era5_spatiotemporal_features_only_nonempty[..., nonempty_features_with_nan_mask > 0][..., 0]).sum(0) > 0

In [None]:
era5_spatiotemporal_features_names = era5_dataset["spatiotemporal_node_feature_names"]
era5_spatiotemporal_features_names_nonempty = era5_spatiotemporal_features_names[~empty_features_mask]

era5_spatiotemporal_features_empty = era5_spatiotemporal_features_names[empty_features_mask]

In [None]:
era5_spatiotemporal_features_empty

In [None]:
num_features_names_new = np.array(list(set(era5_dataset["num_feature_names"].tolist()) - set(era5_spatiotemporal_features_empty.tolist())))
num_features_names_new

In [None]:
era5_dataset["bin_feature_names"]

In [None]:
era5_dataset["cat_feature_names"]

### Summary
- Corrected graph structure
- Removed completely empty columns

In [None]:
# era5_dataset = dict(**era5_dataset)
# era5_dataset["edges"] = era5_correct_edges
# np.savez_compressed(file="../../data/era5_usa.npz", **era5_dataset)

# City roads

In [None]:
city_roads_dataset = np.load("../../data/traffic_jams.npz",allow_pickle=True)
print(list(city_roads_dataset.keys()))
edges_roads = city_roads_dataset["edges"]

In [None]:
city_roads_datasetхЭьуеф

In [None]:
# %%opts RGB [tools=["hover"] width=1200 height=1200]

# connected = get_connected_dots_for_graph(edges_roads)
# r_direct = hv.Curve(connected, label="Direct")
# datashade(r_direct)

In [None]:
!

In [None]:
!ls /home/fvelikon/projects/dgl-spt/notebooks/fvelikon/data_from_yt_jams

In [None]:
np.save("/home/fvelikon/projects/dgl-spt/notebooks/fvelikon/data_from_yt_jams/time_series_component.npy", cuty_roads_targets)

In [None]:
import torch


targets_torch = torch.load("~/projects/dgl-spt/notebooks/fvelikon/data_from_yt_jams/time_series_component.pt")
targets_torch

In [None]:
cuty_roads_targets = city_roads_dataset["targets"]

In [None]:
np.isnan(cuty_roads_targets).any()

In [None]:
cuty_roads_targets

In [None]:
cuty_roads_targets.shape

In [None]:
cuty_roads_targets

In [None]:
cuty_roads_targets[~np.isnan(cuty_roads_targets)].reshape(-1).max()

In [None]:
cuty_roads_targets[np.logical_and(~np.isnan(cuty_roads_targets), cuty_roads_targets > 0)].reshape(-1).min()

In [None]:
cuty_roads_targets.shape

In [None]:
np.isnan(cuty_roads_targets).shape

In [None]:
np.isnan(cuty_roads_targets).sum(1).max()

In [None]:
edges_roads.max()

In [None]:
cuty_roads_targets, np.isnan(cuty_roads_targets).any()

In [None]:
city_targets = cuty_roads_targets[cuty_roads_targets > 0].reshape(-1)



In [None]:
cuty_roads_targets.shape

In [None]:
np.isnan(cuty_roads_targets).sum(0).max()

In [None]:
cuty_roads_temporal_features = city_roads_dataset["temporal_node_features"]

In [None]:
cuty_roads_temporal_features, np.isnan(cuty_roads_temporal_features).any(), city_roads_dataset["temporal_node_feature_names"]

In [None]:
cuty_roads_spatiotemporal_features = city_roads_dataset["spatiotemporal_node_features"]

In [None]:
cuty_roads_spatiotemporal_features, city_roads_dataset["spatiotemporal_node_feature_names"]

In [None]:
np.isnan(cuty_roads_spatiotemporal_features).any()

In [None]:
spatial_features = city_roads_dataset["spatial_node_features"]



In [None]:
spatial_features, np.isnan(spatial_features).any(), city_roads_dataset["spatial_node_feature_names"]

# Browser games

In [None]:
# %%opts RGB [tools=["hover"] width=1200 height=1200]

# r_direct = hv.Curve(get_connected_dots_for_graph(np.load("../../data/ts_browser_games_v1.npz")["edges"]), label="Direct")
# datashade(r_direct)

In [None]:
browser_games_data = np.load("../../data/ts_browser_games_v1.npz", allow_pickle=True)

print(list(browser_games_data.keys()))

In [None]:
browser_games_targets = browser_games_data["targets"]

In [None]:
browser_games_targets, np.isnan(browser_games_targets).any()

In [None]:
browser_games_targets.shape

In [None]:
browser_games_temporal_features = browser_games_data["temporal_node_features"]

In [None]:
browser_games_temporal_features, np.isnan(browser_games_temporal_features).any(), browser_games_data["temporal_node_feature_names"]

In [None]:
browser_games_spatiotemporal_features = browser_games_data["spatiotemporal_node_features"]

In [None]:
browser_games_spatiotemporal_features, browser_games_data["spatiotemporal_node_feature_names"]

In [None]:
560494080 / 46707840

In [None]:
np.cumprod(browser_games_spatiotemporal_features.shape)

In [None]:
np.isnan(browser_games_spatiotemporal_features).sum((0, 1))

In [None]:
np.isnan(browser_games_spatiotemporal_features)[..., 0].sum(0)

In [None]:
np.isnan(browser_games_spatiotemporal_features)[..., 1].sum(0)

In [None]:
np.isnan(browser_games_spatiotemporal_features)[..., 2].sum(0)

In [None]:
np.isnan(browser_games_spatiotemporal_features)[..., 3].sum(0)

In [None]:
np.isnan(browser_games_spatiotemporal_features)[..., 4].sum(0)

In [None]:
np.isnan(browser_games_spatiotemporal_features)[..., 5].sum(0)

In [None]:
np.isnan(browser_games_spatiotemporal_features)[..., 6].sum(0)

In [None]:
np.isnan(browser_games_spatiotemporal_features)[..., 7].sum(0)

In [None]:
np.isnan(browser_games_spatiotemporal_features)[..., 8].sum(0)

In [None]:
np.isnan(browser_games_spatiotemporal_features)[..., 9].sum(0)

In [None]:
np.isnan(browser_games_spatiotemporal_features)[..., 10].sum(0)

In [None]:
np.isnan(browser_games_spatiotemporal_features)[..., 11].sum(0)

In [None]:
browser_games_spatiotemporal_features.shape

In [None]:
import plotly.express as px

px.scatter(np.isnan(browser_games_spatiotemporal_features)[..., 11].sum(0), title="# of NaNs for each node at EACH timestamp (values across different timestamps are equal)")

In [None]:
px.scatter(np.isnan(browser_games_spatiotemporal_features)[..., 11].sum(1), title="# of NaNs for each timestamp for EACH node (values across different nodes are equal)")

In [None]:

all_equal = True

prev = np.isnan(browser_games_spatiotemporal_features)[..., 0].sum(0)
for i in range(1, 12):
    new = np.isnan(browser_games_spatiotemporal_features)[..., i].sum(0)
    all_equal = all_equal and np.allclose(prev, new)
    prev = new
print(all_equal)

In [None]:

all_equal = True

prev = np.isnan(browser_games_spatiotemporal_features)[..., 0].sum(1)
for i in range(1, 12):
    new = np.isnan(browser_games_spatiotemporal_features)[..., i].sum(1)
    all_equal = all_equal and np.allclose(prev, new)
    prev = new
print(all_equal)

In [None]:
set(browser_games_data["spatiotemporal_node_feature_names"].tolist()) & set(browser_games_data["bin_feature_names"].tolist())

In [None]:
set(browser_games_data["spatiotemporal_node_feature_names"].tolist()) & set(browser_games_data["cat_feature_names"].tolist())

In [None]:
set(browser_games_data["num_feature_names"].tolist()) - (set(browser_games_data["spatiotemporal_node_feature_names"].tolist()) | set(browser_games_data["spatial_node_feature_names"].tolist()) | set(browser_games_data["temporal_node_feature_names"].tolist()))

In [None]:
browser_games_data["bin_feature_names"]

In [None]:
browser_games_data["cat_feature_names"]

In [None]:
browser_games_spatial_features = browser_games_data["spatial_node_features"]


In [None]:
browser_games_spatial_features, np.isnan(browser_games_spatial_features).any(), browser_games_data["spatial_node_feature_names"]

In [None]:
np.cumprod(browser_games_spatial_features.shape), browser_games_spatial_features.shape

In [None]:
np.isnan(browser_games_spatial_features).sum((0, 1))

# PemsBay

In [None]:
%%opts RGB [tools=["hover"] width=1200 height=1200]

r_direct = hv.Curve(get_connected_dots_for_graph(np.load("../../data/pems_bay.npz")["edges"]), label="Direct")
datashade(r_direct)

# metr-LA

In [None]:
%%opts RGB [tools=["hover"] width=1200 height=1200]

r_direct = hv.Curve(get_connected_dots_for_graph(np.load("../../data/metr_la.npz")["edges"]), label="Direct")
datashade(r_direct)

# Large_ST

In [None]:
%%opts RGB [tools=["hover"] width=1200 height=1200]

r_direct = hv.Curve(get_connected_dots_for_graph(np.load("../../data/largest.npz")["edges"]), label="Direct")
datashade(r_direct)