In [170]:
import pandas as pd
import numpy as np
import scipy
import time
import random
import sys
# Import T. Kipf's GCN implementation
# https://github.com/tkipf/keras-gcn
sys.path.append('../keras-gcn/')
from keras.layers import Input, Dropout
from keras.models import Model
from keras.optimizers import Adam
from keras.regularizers import l2
from keras.utils import to_categorical

from kegra.layers.graph import GraphConvolution
from kegra.utils import *
%matplotlib inline

In [180]:
A = pd.read_pickle('adjacency_small.pkl') 
A = A[sorted(A.columns)]
X = pd.read_hdf('history_small.hdf', key='hist') 

In [181]:
student = 142954.0 # np.random.choice(X.columns)
X_k_ = X[student]
X_k, ex_k = X_k_[X_k_!=-100], X_k_[X_k_!=-100].index
c = 15
assert c + 1< len(X_k)

In [None]:
def one_hot(x):
    if x == 0:
        return [1,0]
    elif x == 1:
        return [0,1]

In [239]:
n_samples = 15
idx_train = random.sample(list(ex_k), n_samples)
idx_test = random.sample([k for k in ex_k if k not in idx_train], 1)
idx_full = np.hstack((idx_train,idx_test))
X_train = X_k.loc[idx_full].values
#X_train = to_categorical(X_train)
#y_train = X_k.loc[idx_test]
#y_train = np.array(y_train).reshape((len(y_train), 1))
y_train = np.zeros((X_train.shape[0],2))
y_train[n_samples] = one_hot(X_train[n_samples])
A_train = A[A.index.isin(idx_full)][sorted(idx_full)]
A_train = scipy.sparse.csr_matrix(A_train.values)

In [234]:
A_train.shape, X_train.shape, y_train.shape

((16, 16), (16,), (16, 2))

In [241]:
train_mask = np.array(list(map(bool, np.hstack((np.ones(n_samples), [0])))))

In [243]:
# Normalize X_train
X_train = X_train/X_train.sum()
X_train = X_train.reshape((len(X_train), 1))

In [244]:
A_ = preprocess_adj(A_train, True)
support = 1
graph = [X_train, A_]
G = [Input(shape=(None, None), batch_shape=(None, None), sparse=True)]

In [249]:
X_in = Input(shape=(X_train.shape[1],))

# Define model architecture
H = Dropout(0.5)(X_in)
H = GraphConvolution(16, support, activation='relu', W_regularizer=l2(5e-4))([H]+G)
H = Dropout(0.5)(H)
Y = GraphConvolution(y_train.shape[1], support, activation='softmax')([H]+G)

# Compile model
model = Model(inputs=[X_in]+G, outputs=Y)
model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=0.01))

In [250]:
h = model.fit(graph, y_train, sample_weight=train_mask,
          batch_size=A_train.shape[0], epochs=1, shuffle=False, verbose=0)

In [251]:
preds = model.predict(graph, batch_size=A_train.shape[0])

In [252]:
preds

array([[ 0.50037676,  0.49962321],
       [ 0.50036758,  0.49963245],
       [ 0.50037301,  0.49962702],
       [ 0.50037009,  0.49962997],
       [ 0.50035399,  0.49964598],
       [ 0.50037205,  0.49962798],
       [ 0.50036699,  0.49963301],
       [ 0.50036854,  0.49963152],
       [ 0.50036907,  0.49963102],
       [ 0.50036788,  0.49963212],
       [ 0.50037193,  0.49962813],
       [ 0.50036961,  0.49963048],
       [ 0.50036794,  0.49963206],
       [ 0.50036377,  0.4996362 ],
       [ 0.50034058,  0.49965945],
       [ 0.50034106,  0.49965897]], dtype=float32)