In [None]:
from utils.data_preprocess_utils import get_config
from easydict import EasyDict
from utils.dataset import DenmarkDataset

In [None]:
cfg = EasyDict(get_config('./cfg/dataset_cfg.yaml'))
dataset = DenmarkDataset(cfg)

day_data = dataset._get_day_data('2019-01-01')
range_data = dataset.get_data_by_range('2019-01-01', '2019-03-10')

In [50]:
print(range_data.keys())
total_VTF = day_data['Total']
print(total_VTF.shape)

dict_keys(['Sailing', 'Pleasure', 'Cargo', 'Fishing', 'Passenger', 'Tanker', 'Tug', 'Other', 'Total'])
(4, 103, 4)


In [None]:
import geopandas as gpd
import numpy as np
import pandas as pd
gdf = gpd.read_file("/root/GIS/waypoints/convex_hull.geojson") # waypoint hulls
gdf['label'] = gdf.reset_index().index
gdf['centroid'] = gdf['geometry'].centroid.to_crs(crs='EPSG:4326')
N = gdf['centroid'].shape[0]

df_total = pd.DataFrame()
data = np.load('/root/GIS/ais_flow/2019-02-03.npy',allow_pickle=True).item()
data_total = data['Total']

for i in range(N+1):
        df_total = df_total.copy()
        df_total[i] = [data_total[v0][i] for v0 in range(4)]
        # df_total = df_total.rename(index=dict(zip(df_total.index, idx)))

df_total

In [None]:
import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import acf, pacf

time_series_node1_flow = total_VTF[:, 3, 1]
acf_values = acf(time_series_node1_flow, nlags=50)
pacf_values = pacf(time_series_node1_flow, nlags=50)
plt.figure(figsize=(14, 5))
plt.subplot(121)
plt.stem(acf_values)
plt.title('Autocorrelation Function (ACF)')
plt.xlabel('Lag')
plt.ylabel('Autocorrelation')
plt.subplot(122)
plt.stem(pacf_values)
plt.title('Partial Autocorrelation Function (PACF)')
plt.xlabel('Lag')
plt.ylabel('Partial Autocorrelation')
plt.show()

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tensorly as tl
from tensorly.decomposition import non_negative_parafac
from scipy.stats import pearsonr

np.random.seed(42)
tensor_data = total_VTF
flow_in = tensor_data[:, :, 1]

rank = 5  
weights, factors = non_negative_parafac(tl.tensor(flow_in), rank=rank, n_iter_max=200)
# print(factors)
# 提取分解后的因子
factor_time, factor_nodes = factors

plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
plt.plot(factor_time)
plt.title('Factor: Time')

plt.subplot(1, 2, 2)
plt.plot(factor_nodes)
plt.title('Factor: Nodes')
plt.show()


In [None]:
import matplotlib.pyplot as plt
import geopandas as gpd
import folium
print(factor_nodes.shape)
dim1 = factor_nodes[:, 1]
k = 10
topk_indices = np.argsort(dim1)[-k:]
gdf = gpd.read_file('/root/autodl-tmp/waypoints/convex_hull.geojson')
gdf['label'] = gdf.reset_index().index
gdf['centroid'] = gdf['geometry'].centroid.to_crs(crs='EPSG:4326')

m = folium.Map(location=[55.6761, 12.5683], zoom_start=10)
for index in topk_indices:
    if index == 0:
        continue
    index = index+1
    centroid = gdf[gdf['label'] == index]['centroid']
    folium.CircleMarker([centroid.y, centroid.x]).add_to(m)
m