In [1]:
!pip install torch torchvision torchaudio



In [2]:
!python -c "import torch; print(torch.__version__)"

2.4.0


In [3]:
!pip install torch_geometric==2.2.0



In [4]:
!pip install torch_geometric_temporal



In [1]:
import torch
import torch_geometric
import torch_geometric_temporal

# 查看 PyTorch 版本
print("PyTorch version:", torch.__version__)

# 查看 CUDA 版本
print("CUDA version:", torch.version.cuda)

# 查看 PyTorch Geometric 版本
print("PyTorch Geometric version:", torch_geometric.__version__)

# 查看 PyTorch Geometric Temporal 版本
print("PyTorch Geometric Temporal version:", torch_geometric_temporal.__version__)

PyTorch version: 2.4.0
CUDA version: None
PyTorch Geometric version: 2.2.0
PyTorch Geometric Temporal version: 0.54.0


In [24]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx

import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric_temporal.nn.recurrent import GConvLSTM
from torch_geometric_temporal.signal import temporal_signal_split
from torch_geometric_temporal.dataset import METRLADatasetLoader
from torch_geometric_temporal.nn.recurrent import GConvGRU
from torch_geometric_temporal.signal import StaticGraphTemporalSignal


from sklearn.metrics.pairwise import cosine_similarity

In [25]:
df = pd.read_csv('m5-forecasting-accuracy/sales_train_evaluation.csv')

# 抓出 cat_id 為 "HOBBIES" 的數據
df_hobbies = df[df['cat_id'] == 'HOBBIES']
#df_hobbies

In [26]:
# 提取所有日期列
date_columns = [col for col in df_hobbies.columns if col.startswith('d_')]
# 確認總共有多少天
total_days = len(date_columns)
# 選擇最後365天的列
last_365_days_columns = date_columns[-365:]
# 篩選出這些天數的銷售數據
sales_data_last_365 = df_hobbies[last_365_days_columns]

In [50]:
sales_data_last_365

Unnamed: 0,d_1577,d_1578,d_1579,d_1580,d_1581,d_1582,d_1583,d_1584,d_1585,d_1586,...,d_1932,d_1933,d_1934,d_1935,d_1936,d_1937,d_1938,d_1939,d_1940,d_1941
0,0,0,0,0,0,0,0,0,0,0,...,2,4,0,0,0,0,3,3,0,1
1,0,1,0,0,0,0,0,0,1,0,...,0,1,2,1,1,0,0,0,0,0
2,0,1,0,1,0,0,2,0,0,0,...,1,0,2,0,0,0,2,3,0,1
3,2,2,2,2,2,1,0,13,0,0,...,1,1,0,4,0,1,3,0,2,6
4,3,1,0,0,0,0,2,1,1,0,...,0,0,0,2,1,0,0,2,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28001,0,0,1,0,0,0,2,0,0,0,...,0,0,0,0,1,0,0,1,0,0
28002,0,0,0,0,0,2,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
28003,0,0,0,1,0,0,0,0,0,1,...,0,2,0,0,0,1,2,0,0,0
28004,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,1,2,0,0,1,0


In [28]:
# 將前一天的銷售數據作為特徵，當天的銷售數據作為標籤
node_features = sales_data_last_365.iloc[:, :-1]  # 前 364 天作為特徵
labels = sales_data_last_365.iloc[:, 1:].values   # 第 2 到 365 天作為標籤

In [29]:
# 計算餘弦相似度矩陣
#cosine_sim_matrix = cosine_similarity(node_features)
cosine_sim_matrix = cosine_similarity(node_features)
# 為了適應 PyTorch Geometric，將餘弦相似度矩陣轉換為稀疏表示
threshold = 0.5  # 設置相似度的閾值，只有高於閾值的相似度才認為有邊
adj_matrix = (cosine_sim_matrix > threshold).astype(float)  # 大於閾值的設為1，否則為0
edge_index = torch.tensor(np.array(np.nonzero(adj_matrix)), dtype=torch.long)

In [30]:
print(edge_index)

tensor([[   0,    0,    0,  ..., 5647, 5648, 5649],
        [   0,   82,  121,  ..., 5647, 5648, 5649]])


In [31]:
from torch_geometric.data import Data

# 將 node_features 和 labels 轉換為張量
node_features_tensor = torch.tensor(node_features.values, dtype=torch.float)
labels_tensor = torch.tensor(labels, dtype=torch.float)

# 創建 PyTorch Geometric 的 Data 物件
data = Data(x=node_features_tensor, edge_index=edge_index, y=labels_tensor)

In [32]:
print("Node features shape:", node_features_tensor.shape)
print("Labels shape:", labels_tensor.shape)
print("Edge index shape:", edge_index.shape)

Node features shape: torch.Size([5650, 364])
Labels shape: torch.Size([5650, 364])
Edge index shape: torch.Size([2, 308386])


In [33]:
# 將特徵和標籤組合成一個三維陣列
data = np.stack((node_features, labels), axis=-1)  # (5650, 364, 2)

# 檢查數據形狀
print("Combined data shape:", data.shape)

Combined data shape: (5650, 364, 2)


In [34]:
node_features_transposed = node_features.T  #  (364, 5650)
labels_transposed = labels.T  #  (364, 5650)

In [35]:
print("Node features shape:", node_features_transposed.shape)
print("Labels shape:", labels_transposed.shape)

Node features shape: (364, 5650)
Labels shape: (364, 5650)


In [36]:
data = np.stack((node_features_transposed, labels_transposed), axis=-1)  # (364, 5650, 2)
print("Combined data shape:", combined_data.shape)  #  (364, 5650, 2)
# 5650個商品，365天，每天的特徵和標籤 (364, 5650, 2)

Combined data shape: (364, 5650, 2)


In [37]:
# np.savez('gcn_data.npz',
#          data=data)

# np.save('gcn_data',
#          data)

In [47]:
# 讀取 .npz 文件中的所有數據
npzfile = np.load('gcn_data.npz')

# 列出 .npz 文件中的所有數組名稱
print("Available arrays in the .npz file:", npzfile.files)

# 假設數據存儲在名為 'data' 的數組中，從中讀取數據
data = npzfile['data']

# 將三維數據展平為二維數據
flattened_data = data.reshape(data.shape[0], -1)

# 保存為 CSV 文件
np.savetxt('gcn_data.csv', flattened_data, delimiter=',')

Available arrays in the .npz file: ['data']
