In [25]:
import time
import numpy as np
import tensorflow as tf

from models import GAT
from utils import process

checkpt_file = 'pre_trained/cora/mod_cora.ckpt'

dataset = 'cora'

# training params
batch_size = 1
nb_epochs = 20
patience = 100
lr = 0.005  # learning rate
l2_coef = 0.0005  # weight decay
hid_units = [8] # numbers of hidden units per each attention head in each layer
n_heads = [8, 1] # additional entry for the output layer
residual = False
nonlinearity = tf.nn.elu
model = GAT

print('Dataset: ' + dataset)
print('----- Opt. hyperparams -----')
print('lr: ' + str(lr))
print('l2_coef: ' + str(l2_coef))
print('----- Archi. hyperparams -----')
print('nb. layers: ' + str(len(hid_units)))
print('nb. units per layer: ' + str(hid_units))
print('nb. attention heads: ' + str(n_heads))
print('residual: ' + str(residual))
print('nonlinearity: ' + str(nonlinearity))
print('model: ' + str(model))



Dataset: cora
----- Opt. hyperparams -----
lr: 0.005
l2_coef: 0.0005
----- Archi. hyperparams -----
nb. layers: 1
nb. units per layer: [8]
nb. attention heads: [8, 1]
residual: False
nonlinearity: <function elu at 0x7f59fcbcaae8>
model: <class 'models.gat.GAT'>


In [26]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import nltk
import re
import scipy
from nltk.stem import WordNetLemmatizer

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

from subprocess import check_output

# Any results you write to the current directory are saved as output.

In [48]:
import json
train = json.loads(open('train.json').read())
ttest = pd.read_json('test.json')
test = json.loads(open('test.json').read())

In [32]:
from random import shuffle
def get_random_data(all_data, size=2000):
    data = all_data.copy()
    shuffle(data)
    return data[:size]
def get_test_data(all_data, i=0, size=2000):
    return all_data[i*size:(i+1)*size]

In [43]:
index = 0
cindex = 0
all_ingredients = dict()
all_cuisines = dict()
rev = dict()
for data in train + test:
    for x in data['ingredients']:
        if x not in all_ingredients:
            all_ingredients[x] = index
            index += 1
    if data.get('cuisine') and data['cuisine'] not in all_cuisines:
        all_cuisines[data['cuisine']] = cindex
        rev[cindex] = data['cuisine']
        cindex += 1
            
print(len(all_ingredients))
print(len(all_cuisines))

7137
20


In [50]:
iter_count = 5
test_cuisines=[]
for iter in range(iter_count):
    nb_nodes = 2000
    
    data = get_random_data(train, size=nb_nodes)
    test_data = get_test_data(test, iter, size=nb_nodes)
    print(len(data))
    print(len(test_data))
    data += test_data
    
    nb_nodes *= 2
    
    adj = []
    for i in range(nb_nodes):
        row = []
        for j in range(nb_nodes):
            row.append(1 if data[i].get('cuisine') == data[j].get('cuisine') else 0)
        adj.append(row)
    adj = scipy.sparse.csr_matrix(adj)
    
    features = []
    for i in range(nb_nodes):
        row = [0] * len(all_ingredients)
        for x in data[i]['ingredients']:
            row[all_ingredients[x]] = 1
        features.append(row)
    features = np.matrixlib.defmatrix.matrix(features)
    
    y_train = []
    y_val = []
    for i in range(nb_nodes):
        row = [0] * len(all_cuisines)
        if data[i].get('cuisine'):
            row[all_cuisines[data[i]['cuisine']]] = 1
        y_train.append(row)
        
        vrow = [0] * len(all_cuisines)
        y_val.append(vrow)
        
    y_train = np.matrixlib.defmatrix.matrix(y_train)
    y_val = np.matrixlib.defmatrix.matrix(y_val)
    y_test = np.matrixlib.defmatrix.matrix(y_val)
    train_mask = [i < nb_nodes / 2 for i in range(nb_nodes)]
    train_mask = np.matrixlib.defmatrix.matrix(train_mask)
    val_mask = np.matrixlib.defmatrix.matrix([False] * nb_nodes)
    test_mask = np.matrixlib.defmatrix.matrix([False] * nb_nodes)
    ft_size = features.shape[1]
    nb_classes = y_train.shape[1]
    
    

    #features = features[np.newaxis]
    #y_train = y_train[np.newaxis]
    #y_val = y_val[np.newaxis]
    #y_test = y_test[np.newaxis]
    #train_mask = train_mask[np.newaxis]
    #val_mask = val_mask[np.newaxis]
    #test_mask = test_mask[np.newaxis]
    adj = adj.todense()
    
    adj = adj[np.newaxis]
    biases = process.adj_to_bias(adj, [nb_nodes], nhood=1)
    
    with tf.Graph().as_default():
        with tf.name_scope('input'):
            ftr_in = tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, ft_size))
            bias_in = tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, nb_nodes))
            lbl_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes, nb_classes))
            msk_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes))
            attn_drop = tf.placeholder(dtype=tf.float32, shape=())
            ffd_drop = tf.placeholder(dtype=tf.float32, shape=())
            is_train = tf.placeholder(dtype=tf.bool, shape=())
    
        logits = model.inference(ftr_in, nb_classes, nb_nodes, is_train,
                                    attn_drop, ffd_drop,
                                    bias_mat=bias_in,
                                    hid_units=hid_units, n_heads=n_heads,
                                    residual=residual, activation=nonlinearity)
        log_resh = tf.reshape(logits, [-1, nb_classes])
        lab_resh = tf.reshape(lbl_in, [-1, nb_classes])
        msk_resh = tf.reshape(msk_in, [-1])
        loss = model.masked_softmax_cross_entropy(log_resh, lab_resh, msk_resh)
        accuracy = model.masked_accuracy(log_resh, lab_resh, msk_resh)
    
        train_op = model.training(loss, lr, l2_coef)
    
        saver = tf.train.Saver()
    
        init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
    
        vlss_mn = np.inf
        vacc_mx = 0.0
        curr_step = 0
    
        with tf.Session() as sess:
            sess.run(init_op)
    
            train_loss_avg = 0
            train_acc_avg = 0
            val_loss_avg = 0
            val_acc_avg = 0
    
            for epoch in range(nb_epochs):
                tr_step = 0
                tr_size = features.shape[0]
    
                while tr_step * batch_size < tr_size:
                    _, loss_value_tr, acc_tr = sess.run([train_op, loss, accuracy],
                        feed_dict={
                            ftr_in: features[tr_step*batch_size:(tr_step+1)*batch_size],
                            bias_in: biases[tr_step*batch_size:(tr_step+1)*batch_size],
                            lbl_in: y_train[tr_step*batch_size:(tr_step+1)*batch_size],
                            msk_in: train_mask[tr_step*batch_size:(tr_step+1)*batch_size],
                            is_train: True,
                            attn_drop: 0.6, ffd_drop: 0.6})
                    train_loss_avg += loss_value_tr
                    train_acc_avg += acc_tr
                    tr_step += 1
    
                vl_step = 0
                vl_size = features.shape[0]
    
                while vl_step * batch_size < vl_size:
                    loss_value_vl, acc_vl = sess.run([loss, accuracy],
                        feed_dict={
                            ftr_in: features[vl_step*batch_size:(vl_step+1)*batch_size],
                            bias_in: biases[vl_step*batch_size:(vl_step+1)*batch_size],
                            lbl_in: y_val[vl_step*batch_size:(vl_step+1)*batch_size],
                            msk_in: val_mask[vl_step*batch_size:(vl_step+1)*batch_size],
                            is_train: False,
                            attn_drop: 0.0, ffd_drop: 0.0})
                    val_loss_avg += loss_value_vl
                    val_acc_avg += acc_vl
                    vl_step += 1
    
                print('Training: loss = %.5f, acc = %.5f | Val: loss = %.5f, acc = %.5f' %
                        (train_loss_avg/tr_step, train_acc_avg/tr_step,
                        val_loss_avg/vl_step, val_acc_avg/vl_step))
    
                if val_acc_avg/vl_step >= vacc_mx or val_loss_avg/vl_step <= vlss_mn:
                    if val_acc_avg/vl_step >= vacc_mx and val_loss_avg/vl_step <= vlss_mn:
                        vacc_early_model = val_acc_avg/vl_step
                        vlss_early_model = val_loss_avg/vl_step
                        saver.save(sess, checkpt_file)
                    vacc_mx = np.max((val_acc_avg/vl_step, vacc_mx))
                    vlss_mn = np.min((val_loss_avg/vl_step, vlss_mn))
                    curr_step = 0
                else:
                    curr_step += 1
                    if curr_step == patience:
                        print('Early stop! Min loss: ', vlss_mn, ', Max accuracy: ', vacc_mx)
                        print('Early stop model validation loss: ', vlss_early_model, ', accuracy: ', vacc_early_model)
                        break
    
                train_loss_avg = 0
                train_acc_avg = 0
                val_loss_avg = 0
                val_acc_avg = 0
    
            saver.restore(sess, checkpt_file)
    
            ts_size = features.shape[0]
            ts_step = 0
            ts_loss = 0.0
            ts_acc = 0.0
    
            while ts_step * batch_size < ts_size:
                loss_value_ts, acc_ts = sess.run([loss, accuracy],
                    feed_dict={
                        ftr_in: features[ts_step*batch_size:(ts_step+1)*batch_size],
                        bias_in: biases[ts_step*batch_size:(ts_step+1)*batch_size],
                        lbl_in: y_test[ts_step*batch_size:(ts_step+1)*batch_size],
                        msk_in: test_mask[ts_step*batch_size:(ts_step+1)*batch_size],
                        is_train: False,
                        attn_drop: 0.0, ffd_drop: 0.0})
                ts_loss += loss_value_ts
                ts_acc += acc_ts
                ts_step += 1
    
            print('Test loss:', ts_loss/ts_step, '; Test accuracy:', ts_acc/ts_step)
    
            sess.close()
    
    fc=np.squeeze(np.asarray(y_test))
    for i in range(nb_nodes//2,nb_nodes):
        for j in range(20):
            if fc[i][j]:
                test_cuisines.append(rev[j])
            
test_cuisines=np.array(test_cuisines)
ttest['cuisine']=test_cuisines
ttest[['id' , 'cuisine' ]].to_csv("submission.csv", index=False)
ttest[['id','cuisine']].head()

20
20
20
20
20
20


20
20
20
20


ValueError: Length of values does not match length of index