In [None]:
import os
import sys
import numpy as np
import pandas as pd
import pickle
from stellargraph import StellarGraph
from stellargraph import datasets
from IPython.display import display, HTML


from stellargraph.mapper import FullBatchNodeGenerator
from stellargraph.layer import GCN
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Model
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import classification_report

In [None]:
filehandler = open("../data/sample_data/hotel_reservation_only_bottlenecks_sample.pkl","rb")
dat = pickle.load(filehandler)
filehandler.close()
print(type(dat))
print(dat.shape)

In [None]:
df = dat.sample(frac=1)
df_without_labels = df.copy()
df_without_labels = df_without_labels.drop(columns=['label'])
df.columns
df_check = df[df.label == -1]

In [None]:
graph_edges = pd.DataFrame(
    {"source": ["0", "0", "1", "1", "4"], 
     "target": ["1", "4", "2", "3", "5"]}
)
graph_edges

In [None]:
node_indices = [str(i) for i in range(len(df_without_labels.columns))]

In [None]:
df_sample = df.groupby('label', group_keys=False).apply(lambda x: x.sample(frac=0.1))

In [None]:
df_sample['label'].value_counts()

In [None]:
graphs = []
graph_labels = []
count = 0
for index,row in df_sample.iterrows():
    count = count + 1
    label = row['label']
    node_features = row.drop(labels = ['label'])
    graph_nodes = pd.DataFrame({"x": node_features.tolist()}, index = node_indices)
    graph = StellarGraph(graph_nodes, graph_edges)
    graphs.append(graph)
    graph_labels.append(label)
    if count % 10000 == 0:
        print(count)

In [None]:
graph_label_lists = graph_labels.copy()
graph_labels = pd.Series(graph_label_lists)

In [None]:
graph_labels.value_counts().to_frame()

In [None]:
graphs_train, graphs_test, graph_labels_train, graph_labels_test = train_test_split(
    graphs, graph_labels, test_size=0.2, stratify=graph_labels
)

In [None]:
generator = PaddedGraphGenerator(graphs=graphs_train)
gen = generator.flow(list(range(len(graphs_train))), graph_labels_train, shuffle=True)

In [None]:
layer_sizes = [32, 32, 32]
gcn = GCN(
    layer_sizes=layer_sizes, activations=["relu", "relu", "relu"], generator=generator, dropout=0.5
)

In [None]:
# Expose input and output sockets of GCN, for node classification
x_in, x_out = gcn.in_out_tensors()
x_out = Dense(units=1, activation="tanh")(x_out)

In [None]:
model = Model(inputs=x_in, outputs=x_out)
model.compile(optimizer=Adam(learning_rate=0.005), loss=binary_crossentropy, metrics=["acc"])


In [None]:
early_stop = EarlyStopping(monitor="val_acc", patience=10, restore_best_weights=True)
history = model.fit(
    gen, validation_data=gen, epochs=20, verbose=1, callbacks=[early_stop]
)


In [None]:
# Evaluate the model
test_gen = generator.flow(list(range(len(graphs_test))), graph_labels_test)
test_metrics = model.evaluate(test_gen)
print("\nTest Set Metrics:")
for name, val in zip(model.metrics_names, test_metrics):
    print("\t{}: {:0.4f}".format(name, val))

# If you want to get the classification report:
test_predictions = model.predict(test_gen)
test_predictions_class = np.where(test_predictions > 0.5, 1, 0)
print(classification_report(graph_labels_test, test_predictions_class))