In [None]:
from tensorflow import keras
import stellargraph as sg
from stellargraph import StellarGraph
from stellargraph.data import EdgeSplitter
from stellargraph.mapper import GraphSAGELinkGenerator, GraphSAGENodeGenerator
from stellargraph.layer import GraphSAGE, link_classification
from stellargraph.data import UniformRandomWalk
from stellargraph.data import UnsupervisedSampler
from sklearn.model_selection import train_test_split

In [None]:
G= nx.read_gpickle(f"result/inter_network_{filename_param}.gpickle")
g = StellarGraph.from_networkx(G,node_features="vec")

In [None]:
nodes = list(g.nodes())
number_of_walks = 1
length = 5

batch_size = 50
epochs = 4
num_samples = [10, 5]

unsupervised_samples = UnsupervisedSampler(
    g, nodes=nodes, length=length, number_of_walks=number_of_walks
)

## ===
generator = GraphSAGELinkGenerator(g, batch_size, num_samples,weighted =True)
train_gen = generator.flow(unsupervised_samples)

In [None]:
layer_sizes = [50, 50]
graphsage = GraphSAGE(
    layer_sizes=layer_sizes, generator=generator, bias=True, dropout=0.0, normalize="l2"
)

# Build the model and expose input and output sockets of graphsage, for node pair inputs:
x_inp, x_out = graphsage.in_out_tensors()

prediction = link_classification(
    output_dim=1, output_act="sigmoid", edge_embedding_method="ip"
)(x_out)

# ====
model = keras.Model(inputs=x_inp, outputs=prediction)

model.compile(
    optimizer=keras.optimizers.Adam(lr=1e-3),
    loss=keras.losses.binary_crossentropy,
    metrics=[keras.metrics.binary_accuracy],
)

In [None]:
history = model.fit(
    train_gen,
    epochs=epochs,
    verbose=1,
    use_multiprocessing=False,
    workers=4,
    shuffle=True,
)

In [None]:
x_inp_src = x_inp[0::2]
x_out_src = x_out[0]
embedding_model = keras.Model(inputs=x_inp_src, outputs=x_out_src)

In [None]:
node_ids = list(G.nodes)
node_gen = GraphSAGENodeGenerator(g, batch_size, num_samples, weighted=True).flow(node_ids)

In [None]:
node_embeddings = embedding_model.predict(node_gen, workers=4, verbose=1)

In [None]:
node_subject = gdf_zoning.zone1.astype("category").cat.codes

X = node_embeddings
if X.shape[1] > 2:
    transform = TSNE  # PCA

    trans = transform(n_components=2)
    emb_transformed = pd.DataFrame(trans.fit_transform(X), index=node_ids)
    emb_transformed["lab"] = node_subject
else:
    emb_transformed = pd.DataFrame(X, index=node_ids)
    emb_transformed = emb_transformed.rename(columns={"0": 0, "1": 1})
    emb_transformed["lab"] = node_subject

In [None]:
alpha = 0.7

fig, ax = plt.subplots(figsize=(7, 7))
scatter = ax.scatter(
    emb_transformed[0],
    emb_transformed[1],
    c=emb_transformed["lab"],
    cmap="coolwarm",
    alpha=alpha,
)
#ax.set(aspect="equal", xlabel="$X_1$", ylabel="$X_2$")
legend1 = ax.legend(*scatter.legend_elements(),
                    loc="lower left", title="Classes")
ax.add_artist(legend1)
plt.title(
    "{} visualization of GraphSAGE embeddings".format(transform.__name__)
)

plt.show()

In [None]:
* weight取两位小数加快计算过程
* word2vec+ 全连接图
* 无节点属性embedding

In [None]:
df_gnnvec = pd.DataFrame(node_embeddings).merge(df_zoningembed.zone1.astype("category").cat.codes,left_index=True,right_index=True)
df_gnnvec.to_csv("result/gnn_vec.csv",index=False)

In [None]:
X = df_gnnvec[[str(i) for i in range(50)]]
y = df_gnnvec.zone1.astype("category").cat.codes

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 0)

classifier = RandomForestClassifier(n_estimators = 10, criterion = 'entropy', random_state = 0)
classifier.fit(X_train, y_train)

y_pred = classifier.predict(X_test)

matrix = confusion_matrix(y_test, y_pred)
acc = matrix.diagonal()/matrix.sum(axis=1)
print(acc)
print(accuracy_score(y_test,y_pred))
print(cohen_kappa_score(y_test,y_pred))