-
Notifications
You must be signed in to change notification settings - Fork 0
/
DGI_WISDM.py
162 lines (118 loc) · 5.53 KB
/
DGI_WISDM.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
import warnings
import torch.optim as optim
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import precision_recall_fscore_support, accuracy_score
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from adj_matrix_generate import *
from dgi import DGI
from dgi_multi import *
from signal_to_nodes import *
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
warnings.filterwarnings("ignore")
# %% Preprocessing WISDM
df = pd.read_csv('./WISDM.csv')
df.dropna(inplace=True)
scaler = StandardScaler()
df[['x-axis', 'y-axis', 'z-axis']] = scaler.fit_transform(df[['x-axis', 'y-axis', 'z-axis']])
# %% Creating Nodes
window_size = 100 # WISDM
overlap = 0.5
train_ratio = 0.8
# 对时间序列数据进行窗口划分
nodes, nodes_labels, labels_index = signal_to_nodes_WISDM(df, window_size, overlap)
n, m, p = nodes.shape
nodes = nodes.reshape(n, m * p)
# %% Graph Construction
g = build_dgl_graph(nodes.cpu(), nodes_labels.cpu(), method='cosine', param=80).to(device)
# 使用generate_masks函数
train_mask, test_mask = generate_masks(nodes, nodes_labels, train_ratio)
# 将掩码添加到图的ndata中
g.train_mask = train_mask.to(device)
g.test_mask = test_mask.to(device)
features = nodes
labels = nodes_labels
# %% 设置超参数
# Multi_DGI
n_layers = 2
dropout = 0.2
lr = 0.001
epochs = 500
patience = 20
n_hidden = 64
activation = nn.PReLU(n_hidden)
# %% 初始化DGI模型
in_feats = features.shape[1]
model = DGI(g, in_feats, n_hidden, n_layers, activation, dropout).to(device)
optimizer = optim.Adam(model.parameters(), lr=lr)
# %% 训练Muilt_DGI模型
best_loss = float('inf')
best_epoch = 0
counter = 0
for epoch in range(epochs):
model.train()
optimizer.zero_grad()
loss = model(features)
loss.backward()
optimizer.step()
if loss.item() < best_loss:
best_loss = loss.item()
best_epoch = epoch
counter = 0
torch.save(model.state_dict(), '/home/hjf/experiment_muilt_dgi/best_dgi.pkl')
else:
counter += 1
if counter >= patience:
print(f"Early stopping! Epoch: {epoch}")
break
if epoch % 10 == 0:
print(f"Epoch [{epoch}/{epochs}], Loss: {loss.item()}")
# %% 提取节点表示
print(f"Loading the best model")
model.load_state_dict(torch.load('/home/hjf/experiment_muilt_dgi/best_dgi.pkl'))
embeds = model.encoder(features, corrupt=False).detach()
# Concatenate feature and embeddings to obtain combined feature matrix
features_emb = torch.cat((features, embeds), 1)
# %% Random Forest Classifier
model_rf = RandomForestClassifier(random_state=42, n_estimators=50, max_features=50)
# Train the downstream classifier
model_rf.fit(features_emb[train_mask].cpu().data.numpy(), labels[train_mask].cpu().data.numpy())
y_preds = model_rf.predict(features_emb[test_mask].cpu().data.numpy())
# Evaluate the Classifier
prec, rec, f1, num = precision_recall_fscore_support(labels[test_mask].cpu().data.numpy(), y_preds,
average='macro')
acc = accuracy_score(labels[test_mask].cpu().data.numpy(), y_preds)
print("ALL feature + Embeddings RandomForest Classifier")
print("macro Precision:%.3f \nRecall:%.3f \nF1 Score:%.3f \nAccuracy:%.3f" % (prec, rec, f1, acc))
# %%
model_mlp = MLPClassifier(hidden_layer_sizes=(in_feats // 4, in_feats // 8), activation='relu', max_iter=1000,
random_state=42)
# Train the downstream classifier
model_mlp.fit(features_emb[train_mask].cpu().data.numpy(), labels[train_mask].cpu().data.numpy())
y_preds = model_mlp.predict(features_emb[test_mask].cpu().data.numpy())
# Evaluate the Classifier
prec, rec, f1, num = precision_recall_fscore_support(labels[test_mask].cpu().data.numpy(), y_preds,
average='macro')
acc = accuracy_score(labels[test_mask].cpu().data.numpy(), y_preds)
print("ALL feature + DGI Embeddings MLP Classifier")
print("macro Precision:%.3f \nRecall:%.3f \nF1 Score:%.3f \nAccuracy:%.3f" % (prec, rec, f1, acc))
# %% Use Support Vector Machine (SVM) for classification
model_svm = SVC(random_state=42)
model_svm.fit(features_emb[train_mask].cpu().data.numpy(), labels[train_mask].cpu().data.numpy())
y_preds_svm = model_svm.predict(features_emb[test_mask].cpu().data.numpy())
prec_svm, rec_svm, f1_svm, num_svm = precision_recall_fscore_support(labels[test_mask].cpu().data.numpy(), y_preds_svm,
average='macro')
acc_svm = accuracy_score(labels[test_mask].cpu().data.numpy(), y_preds_svm)
print("ALL feature + DGI Embeddings SVM Classifier")
print("macro Precision:%.3f \nRecall:%.3f \nF1 Score:%.3f \nAccuracy:%.3f" % (prec_svm, rec_svm, f1_svm, acc_svm))
# %% Use Logistic Regression for classification
model_lr = LogisticRegression(max_iter=500, random_state=42)
model_lr.fit(features_emb[train_mask].cpu().data.numpy(), labels[train_mask].cpu().data.numpy())
y_preds_lr = model_lr.predict(features_emb[test_mask].cpu().data.numpy())
prec_lr, rec_lr, f1_lr, num_lr = precision_recall_fscore_support(labels[test_mask].cpu().data.numpy(), y_preds_lr,
average='macro')
acc_lr = accuracy_score(labels[test_mask].cpu().data.numpy(), y_preds_lr)
print("ALL feature + DGI Embeddings Logistic Regression Classifier")
print("macro Precision:%.3f \nRecall:%.3f \nF1 Score:%.3f \nAccuracy:%.3f" % (prec_lr, rec_lr, f1_lr, acc_lr))