# The implementation of Text GCN in the paper:
## Liang Yao, Chengsheng Mao, Yuan Luo. "Graph Convolutional Networks for Text Classification." In 33rd AAAI Conference on Artificial Intelligence (AAAI-19), 7370-7377

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import re, os
from sklearn.preprocessing import LabelEncoder
from sklearn import model_selection
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory


# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Reading the Data

In [None]:
PATH= '../input/bbc-full-text-document-classification/bbc-fulltext (document classification)/bbc/' 

def readFile(full_file_name):
    with open(full_file_name, 'r', encoding="utf-8", errors='ignore') as f:
        return '\n'.join(f.readlines())
#retreives data from the PATH and put it list 'data' in the format of 'category' and 'text'
def getData():

    data = []
    
    for root, dirs, files in os.walk(PATH):
        for f in files:
            if os.path.splitext(f)[1] == '.txt':
                full_file_name = os.path.join(root, f)
                category = os.path.basename(root)
                data.append({'category':category,'text':readFile(full_file_name)})
                    
    return data

data_frame = pd.DataFrame(getData())
print(data_frame.head(-5))

# Remove and clean Docs/Words

In [None]:
# cleans 'text' of any unnessary content
def clean_str(string):
    """
    Tokenization/string cleaning for all datasets except for SST.
    Original taken from https://github.com/yoonkim/CNN_sentence/blob/master/process_data.py
    """
    string = re.sub(r"[^A-Za-z0-9(),!?\'\`]", " ", string)
    string = re.sub(r"\'s", " \'s", string)
    string = re.sub(r"\'ve", " \'ve", string)
    string = re.sub(r"n\'t", " n\'t", string)
    string = re.sub(r"\'re", " \'re", string)
    string = re.sub(r"\'d", " \'d", string)
    string = re.sub(r"\'ll", " \'ll", string)
    string = re.sub(r",", " , ", string)
    string = re.sub(r"!", " ! ", string)
    string = re.sub(r"\(", " \( ", string)
    string = re.sub(r"\)", " \) ", string)
    string = re.sub(r"\?", " \? ", string)
    string = re.sub(r"\s{2,}", " ", string)
    return string.strip().lower()

In [None]:
from nltk.corpus import stopwords
import nltk
from nltk.wsd import lesk
from nltk.corpus import wordnet as wn

stoplist = set(stopwords.words("english"))

## Remove Words

In [None]:
#take all the words in the documents and place it in this list below. 
doc_content_list = []#should have line by line of all the documents in a list

for root, dirs, files in os.walk(PATH):

    for f in files:
        if os.path.splitext(f)[1] == '.txt':
            with open(root+'/'+f, 'r', errors='replace')as f:
                lines = f.readlines()
                for line in lines:
                    doc_content_list.append(line.strip())
    
'''
for texts in data_frame[["text"]]:
    text = data_frame[texts]
    for doc in text.values: 
        lines = doc.split('\n')
        for line in lines:
            doc_content_list.append(line)'''
        
    
#print(len(doc_content_list))
print(doc_content_list[2])
    

In [None]:
word_freq = {}  # to remove rare words

for doc_content in doc_content_list:
    temp = clean_str(doc_content)
    words = temp.split()
    for word in words:
        if word in word_freq:
            word_freq[word] += 1
        else:
            word_freq[word] = 1
            
print(len(word_freq))
import itertools 
out = dict(itertools.islice(word_freq.items(), 5)) 
print(str(out))

### BBC clean text

In [None]:
#here we use stop_words to get rid of unuseful words
clean_docs = []
for doc_content in doc_content_list:
    temp = clean_str(doc_content)
    words = temp.split()
    doc_words = []
    for word in words:
        # word not in stop_words and word_freq[word] >= 5
        if word not in stoplist and word_freq[word] >= 5:
            doc_words.append(word)

    doc_str = ' '.join(doc_words).strip()
    if doc_str != '':
        clean_docs.append(doc_str)
print(clean_docs[328])
clean_corpus_str = '\n'.join(clean_docs)

## Split data into Train and Test

In [None]:
train_x, train_y, test_x, test_y = model_selection.train_test_split(data_frame.text, data_frame.category, test_size=0.20)

data_frame.category[0]

## Create a list of lists with data details of all documents. Each list have the "document number", train or test, and category

In [None]:
trainx= pd.DataFrame(train_x)
testy= pd.DataFrame(test_y)


doc_name_list=[]
doc_train_list = []
doc_test_list = []
train_ids = []
test_ids = []

for doc, text in trainx.iterrows():
    category = data_frame.iloc[doc][0]
    lists=[]
    lists.extend([doc, 'train', category])
    doc_name_list.append(lists)
    doc_train_list.append(lists)
    train_ids.append(doc)
    
for doc, text in testy.iterrows():
    category = data_frame.iloc[doc][0]
    lists=[]
    lists.extend([doc, 'test', category])
    doc_name_list.append(lists)
    doc_test_list.append(lists)
    test_ids.append(doc)


#print(len(doc_name_list))
#print(len(doc_train_list))
#print(len(doc_test_list))
        
    

#shuffle
import random
random.shuffle(doc_train_list)
random.shuffle(doc_test_list)
random.shuffle(train_ids)
random.shuffle(test_ids)
    
test_ids_str = '\n'.join(str(index) for index in test_ids)
train_ids_str = '\n'.join(str(index) for index in train_ids)
ids = train_ids + test_ids


shuffle_doc_name_list = []
shuffle_doc_words_list = []
for id in ids:
    shuffle_doc_name_list.append(doc_name_list[int(id)])
    shuffle_doc_words_list.append(clean_docs[int(id)])


List=[]
for x,y,z in shuffle_doc_name_list:
    L = f"{x}\t{y}\t{z}"
    List.append(L)
shuffle_doc_name_str = '\n'.join(List)
shuffle_doc_words_str = '\n'.join(shuffle_doc_words_list)



## Build Vocab

In [None]:
word_freq = {}
word_set = set()

for doc_words in shuffle_doc_words_list:
    words = doc_words.split()
    for word in words:
        word_set.add(word)
        if word in word_freq:
            word_freq[word] += 1
        else:
            word_freq[word] = 1
              
vocab = list(word_set)
vocab_size = len(vocab)
print(vocab_size)

x = [word for word in word_set]

l=[x for x in word_freq.items()]
print(l[0:10])
    



## Build Word-Document list

In [None]:
word_doc_list = {}

for i in range(len(shuffle_doc_words_list)):
    doc_words = shuffle_doc_words_list[i]
    words = doc_words.split()
    appeared = set()
    for word in words:
        if word in appeared:
            continue
        if word in word_doc_list:
            doc_list = word_doc_list[word]
            doc_list.append(i)
            word_doc_list[word] = doc_list
        else:
            word_doc_list[word] = [i]
        appeared.add(word)

#print(len(word_doc_list))
#print(len(appeared))



l=[x for x in word_doc_list.items()]
print(l[2])

## Build Word-Document frequency dictionary

In [None]:
word_doc_freq = {}
for word, doc_list in word_doc_list.items():
    word_doc_freq[word] = len(doc_list)
    
l=[x for x in word_doc_freq.items()]
print(l[2])

## Build Word ID Map

In [None]:
word_id_map = {}
for i in range(vocab_size):
    word_id_map[vocab[i]] = i

vocab_str = '\n'.join(vocab)

l=[x for x in word_id_map.items()]
print(l[0:10])

## TF-IDF Document-Word edges

In [None]:

from sklearn.feature_extraction.text import TfidfVectorizer

definitions = []
for word in vocab:
    word = word.strip()
    synsets = wn.synsets(clean_str(word))
    word_defs = []
    for synset in synsets:
        syn_def = synset.definition()
        word_defs.append(syn_def)
    word_des = ' '.join(word_defs)
    if word_des == '':
        word_des = '<PAD>'
    definitions.append(word_des)

tfidf_vec = TfidfVectorizer(max_features=1000)
tfidf_matrix = tfidf_vec.fit_transform(definitions)
tfidf_matrix_array = tfidf_matrix.toarray()

## Word Vectors

In [None]:

word_vectors = []
for i in range(len(vocab)):
    word = vocab[i]
    vector = tfidf_matrix_array[i]
    str_vector = []
    for j in range(len(vector)):
        str_vector.append(str(vector[j]))
    temp = ' '.join(str_vector)
    word_vector = word + ' ' + temp
    word_vectors.append(word_vector)
print(len(word_vectors))

## Word2Vec

In [None]:

word_vector_map = {}
vocab =[]
embd=[]

for x in word_vectors:
    line = x.strip().split(' ')
    if (len(line) > 2):
        vocab.append(line[0])
        vector= line[1:]
        length = len(vector)
        for i in range(length):
            vector[i] = float(vector[i])
        embd.append(vector)
        word_vector_map[line[0]] = vector

print(vocab[0], embd[0], len(word_vectors))

## Build Category Label list

In [None]:
label_set = set()
for doc_meta in shuffle_doc_name_list:
    label_set.add(doc_meta[2])
label_list = list(label_set)

label_list_str = '\n'.join(label_list)
print(label_list_str)

## Build training variables, we select only 90% of the training set

In [None]:
train_size = len(train_ids)
val_size = int(0.1 * train_size)
real_train_size = train_size - val_size  # - int(0.5 * train_size)
# different training rates

real_train_doc_names = shuffle_doc_name_list[:real_train_size]
List2=[]
for x,y,z in real_train_doc_names:
    L = f"{x}\t{y}\t{z}"
    List2.append(L)
real_train_doc_names_str = '\n'.join(List2)

print(shuffle_doc_words_list[2])


## Build Graph
#### (This part will take a long time)

In [None]:
import time
start = time.process_time()
word_embeddings_dim = len(embd[0])


row_x = []
col_x = []
data_x = []
for i in range(real_train_size):
    doc_vec = np.array([0.0 for k in range(word_embeddings_dim)])
    doc_words = shuffle_doc_words_list[i]
    words = doc_words.split()
    doc_len = len(words)
    for word in words:
        if word in word_vector_map:
            word_vector = word_vector_map[word]
            #print(word_vector)
            #print(np.array(word_vector).shape, doc_vec.shape)
            doc_vec = doc_vec = np.array(word_vector)
    for j in range(word_embeddings_dim):
        row_x.append(i)
        col_x.append(j)
        data_x.append(doc_vec[j]/doc_len)


print("--- %s seconds ---" % (time.process_time() - start))

## Create _ matrix*

In [None]:
import scipy.sparse as sp
x = sp.csr_matrix((data_x, (row_x, col_x)), shape=(real_train_size, word_embeddings_dim))
y=[]#matrix of one hot vectors

for i in range(real_train_size):
    doc_meta = shuffle_doc_name_list[i]
    label = doc_meta[2]
    one_hot = [0 for l in range(len(label_list))]
    label_index = label_list.index(label)
    one_hot[label_index] = 1
    y.append(one_hot)
y = np.array(y)
print(y[2])

## Feature Vectors of test docs, no initial features

In [None]:
test_size = len(test_ids)

row_test_x = []
col_test_x = []
data_test_x = []

for i in range(test_size):
    doc_vec = np.array([0.0 for k in range(word_embeddings_dim)])
    doc_words = shuffle_doc_words_list[i + train_size]
    words = doc_words.split()
    doc_len = len(words)
    for word in words:
        if word in word_vector_map:
            word_vector = word_vector_map[word]
            doc_vec = doc_vec + np.array(word_vector)

    for j in range(word_embeddings_dim):
        row_test_x.append(i)
        col_test_x.append(j)
        data_test_x.append(doc_vec[j] / doc_len)
print(len(data_test_x))

In [None]:
test_x = sp.csr_matrix((data_test_x, (row_test_x, col_test_x)),
                   shape=(test_size, word_embeddings_dim))
test_y =[]
for i in range(test_size):
    doc_meta = shuffle_doc_name_list[i + train_size]
    label = doc_meta[2]
    one_hot = [0 for l in range(len(label_list))]
    label_index = label_list.index(label)
    one_hot[label_index] = 1
    test_y.append(one_hot)
test_y = np.array(test_y)
print(test_y[0])

## Create feature vectors of both labeled and unlabeled training instances,(a superset of x)
### This part will take longer

In [None]:
start2 = time.process_time()

word_vectors = np.random.uniform(-0.01, 0.01,
                                 (vocab_size, word_embeddings_dim))
for i in range(len(vocab)):
    word = vocab[i]
    if word in word_vector_map:
        vector = word_vector_map[word]
        word_vectors[i] = vector

row_allx = []
col_allx = []
data_allx = []

for i in range(train_size):
    doc_vec = np.array([0.0 for k in range(word_embeddings_dim)])
    doc_words = shuffle_doc_words_list[i]
    words = doc_words.split()
    doc_len = len(words)
    for word in words:
        if word in word_vector_map:
            word_vector = word_vector_map[word]
            doc_vec = doc_vec + np.array(word_vector)
    for j in range(word_embeddings_dim):
        row_allx.append(int(i))
        col_allx.append(j)
        data_allx.append(doc_vec[j]/doc_len)
for i in range(vocab_size):
    for j in range(word_embeddings_dim):
        row_allx.append(int(i+train_size))
        col_allx.append(j)
        data_allx.append(word_vectors.item((i, j)))
        

        
allx = sp.csr_matrix(
    (data_allx, (row_allx, col_allx)), shape=(train_size + vocab_size, word_embeddings_dim))

print("--- %s seconds ---" % (time.process_time() - start2))

In [None]:
ally=[]
for i in range(train_size):
    doc_meta = shuffle_doc_name_list[i]
    label = doc_meta[2]
    one_hot = [0 for l in range(len(label_list))] 
    label_index = label_list.index(label)
    one_hot[label_index]=1
    ally.append(one_hot)
    
for i in range(vocab_size):
    one_hot = [0 for l in range(len(label_list))]
    ally.append(one_hot)
    
ally = np.array(ally)
print(x.shape, y.shape, test_x.shape, test_y.shape, allx.shape, ally.shape)

## Word co-occurence with context windows
### This will take some time

In [None]:
start3 = time.process_time()
window_size = 20
windows = []

for doc_words in shuffle_doc_words_list:
    words = doc_words.split()
    length = len(words)
    if length <= window_size:
        windows.append(words)
    else:
        # print(length, length - window_size + 1)
        for j in range(length - window_size + 1):
            window = words[j: j + window_size]
            windows.append(window)
            # print(window)
word_window_freq = {}
for window in windows:
    appeared = set()
    for i in range(len(window)):
        if window[i] in appeared:
            continue
        if window[i] in word_window_freq:
            word_window_freq[window[i]] += 1
        else:
            word_window_freq[window[i]] = 1
        appeared.add(window[i])

word_pair_count = {}
for window in windows:
    for i in range(1, len(window)):
        for j in range(0, i):
            word_i = window[i]
            word_i_id = word_id_map[word_i]
            word_j = window[j]
            word_j_id = word_id_map[word_j]
            if word_i_id == word_j_id:
                continue
            word_pair_str = str(word_i_id) + ',' + str(word_j_id)
            if word_pair_str in word_pair_count:
                word_pair_count[word_pair_str] += 1
            else:
                word_pair_count[word_pair_str] = 1
            # two orders
            word_pair_str = str(word_j_id) + ',' + str(word_i_id)
            if word_pair_str in word_pair_count:
                word_pair_count[word_pair_str] += 1
            else:
                word_pair_count[word_pair_str] = 1
                
print(len(word_pair_count))
print("--- %s seconds ---" % (time.process_time() - start3))

## Pointwise mutual information (PMI) as weights

In [None]:
from math import log

row = []
col = []
weight = []

num_window = len(windows)

for key in word_pair_count:
    temp = key.split(',')
    i = int(temp[0])
    j = int(temp[1])
    count = word_pair_count[key]
    word_freq_i = word_window_freq[vocab[i]]
    word_freq_j = word_window_freq[vocab[j]]
    pmi = log((1.0 * count / num_window) /
              (1.0 * word_freq_i * word_freq_j/(num_window * num_window)))
    if pmi <= 0:
        continue
    row.append(train_size + i)
    col.append(train_size + j)
    weight.append(pmi)

## word vector cosine similarity as weights
### This may take up to 91.9 minutes!

In [None]:
'''from scipy.spatial.distance import cosine
start4 = time.process_time()


for i in range(vocab_size):
    for j in range(vocab_size):
        if vocab[i] in word_vector_map and vocab[j] in word_vector_map:
            vector_i = np.array(word_vector_map[vocab[i]])
            vector_j = np.array(word_vector_map[vocab[j]])
            similarity = 1.0 - cosine(vector_i, vector_j)
            if similarity > 0.9:
                #print(vocab[i], vocab[j], similarity)
                row.append(train_size + i)
                col.append(train_size + j)
                weight.append(similarity)
print("--- %s seconds ---" % (time.process_time() - start4))''' 

## Doc Word Frequency

In [None]:
doc_word_freq = {}

for doc_id in range(len(shuffle_doc_words_list)):
    doc_words = shuffle_doc_words_list[doc_id]
    words = doc_words.split()
    for word in words:
        word_id = word_id_map[word]
        doc_word_str = str(doc_id) + ',' + str(word_id)
        if doc_word_str in doc_word_freq:
            doc_word_freq[doc_word_str] += 1
        else:
            doc_word_freq[doc_word_str] = 1

for i in range(len(shuffle_doc_words_list)):
    doc_words = shuffle_doc_words_list[i]
    words = doc_words.split()
    doc_word_set = set()
    for word in words:
        if word in doc_word_set:
            continue
        j = word_id_map[word]
        key = str(i) + ',' + str(j)
        freq = doc_word_freq[key]
        if i < train_size:
            row.append(i)
        else:
            row.append(i + vocab_size)
        col.append(train_size + j)
        idf = log(1.0 * len(shuffle_doc_words_list) /
                  word_doc_freq[vocab[j]])
        weight.append(freq * idf)
        doc_word_set.add(word)

## Build Nodes and Adjency Matrix with the proper weights

In [None]:
node_size = train_size + vocab_size + test_size
adj = sp.csr_matrix(
    (weight, (row, col)), shape=(node_size, node_size))

# Train

In [None]:
import torch
import torch.nn as nn

seed = random.randint(1, 200)
seed = 2019
np.random.seed(seed)
torch.manual_seed(seed)

## Docstring for CONFIG

In [None]:
class CONFIG(object):
    """docstring for CONFIG"""
    def __init__(self):
        super(CONFIG, self).__init__()
        
        self.dataset = 'bbc'
        self.model = 'gcn'  # 'gcn', 'gcn_cheby', 'dense'
        self.learning_rate = 0.02   # Initial learning rate.
        self.epochs  = 200  # Number of epochs to train.
        self.hidden1 = 200  # Number of units in hidden layer 1.
        self.dropout = 0.5  # Dropout rate (1 - keep probability).
        self.weight_decay = 0.   # Weight for L2 loss on embedding matrix.
        self.early_stopping = 10 # Tolerance for early stopping (# of epochs).
        self.max_degree = 3      # Maximum Chebyshev polynomial degree.

## Multilayer Perceptron

In [None]:
class MLP(nn.Module):
    def __init__(self, input_dim, dropout_rate=0., num_classes=10):
        super(MLP, self).__init__()

        self.fc1 = nn.Linear(input_dim, 200)
        self.fc2 = nn.Linear(200, num_classes)
        
        self.relu = nn.ReLU(inplace=True)
        self.dropout = nn.Dropout(dropout_rate)
    
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.dropout(out)

        out = self.fc2(out)

        return out

## Graph Convolutional Network\Convolution

In [None]:
class GraphConvolution(nn.Module):
    def __init__( self, input_dim, \
                        output_dim, \
                        support, \
                        act_func = None, \
                        featureless = False, \
                        dropout_rate = 0., \
                        bias=False):
        super(GraphConvolution, self).__init__()
        self.support = support
        self.featureless = featureless

        for i in range(len(self.support)):
            setattr(self, 'W{}'.format(i), nn.Parameter(torch.randn(input_dim, output_dim)))

        if bias:
            self.b = nn.Parameter(torch.zeros(1, output_dim))

        self.act_func = act_func
        self.dropout = nn.Dropout(dropout_rate)

        
    def forward(self, x):
        x = self.dropout(x)

        for i in range(len(self.support)):
            if self.featureless:
                pre_sup = getattr(self, 'W{}'.format(i))
            else:
                pre_sup = x.mm(getattr(self, 'W{}'.format(i)))
            
            if i == 0:
                out = self.support[i].mm(pre_sup)
            else:
                out += self.support[i].mm(pre_sup)

        if self.act_func is not None:
            out = self.act_func(out)

        self.embedding = out
        return out


class GCN(nn.Module):
    def __init__( self, input_dim, \
                        support,\
                        dropout_rate=0., \
                        num_classes=10):
        super(GCN, self).__init__()
        
        # GraphConvolution
        self.layer1 = GraphConvolution(input_dim, 200, support, act_func=nn.ReLU(), featureless=True, dropout_rate=dropout_rate)
        self.layer2 = GraphConvolution(200, num_classes, support, dropout_rate=dropout_rate)
        
    
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        return out

## Load Data/Corpus

In [None]:
#def load_data(x, y, tx, ty, allx, ally):
x, y, test_x, test_y, allx, ally, adj = tuple([x,y,test_x,test_y,allx,ally,adj])

features = sp.vstack((allx, test_x)).tolil()
labels = np.vstack((ally, test_y))
val_size= train_size - x.shape[0]
test_size = test_x.shape[0]
idx_train = range(len(y))
idx_val = range(len(y), len(y) + val_size)
idx_test = range(allx.shape[0], allx.shape[0] + test_size)


## Creat Sample Mask

In [None]:
def sample_mask(idx, l):
    mask = np.zeros(l)
    mask[idx] = 1
    return np.array(mask, dtype=np.bool)

In [None]:
train_mask = sample_mask(idx_train, labels.shape[0])
val_mask = sample_mask(idx_val, labels.shape[0])
test_mask = sample_mask(idx_test, labels.shape[0])

y_train = np.zeros(labels.shape)
y_val = np.zeros(labels.shape)
y_test = np.zeros(labels.shape)
y_train[train_mask, :] = labels[train_mask, :]
y_val[val_mask, :] = labels[val_mask, :]
y_test[test_mask, :] = labels[test_mask, :]

adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj)

## Pre-processing

In [None]:
def preprocess_features(features):
    """Row-normalize feature matrix and convert to tuple representation"""
    rowsum = np.array(features.sum(1))
    r_inv = np.power(rowsum, -1).flatten()
    r_inv[np.isinf(r_inv)] = 0.
    r_mat_inv = sp.diags(r_inv)
    features = r_mat_inv.dot(features)
    # return sparse_to_tuple(features)
    return features.A

def normalize_adj(adj):
    """Symmetrically normalize adjacency matrix."""
    adj = sp.coo_matrix(adj)
    rowsum = np.array(adj.sum(1))
    d_inv_sqrt = np.power(rowsum, -0.5).flatten()
    d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0.
    d_mat_inv_sqrt = sp.diags(d_inv_sqrt)
    return adj.dot(d_mat_inv_sqrt).transpose().dot(d_mat_inv_sqrt).tocoo()

def preprocess_adj(adj):
    """Preprocessing of adjacency matrix for simple GCN model and conversion to tuple representation."""
    adj_normalized = normalize_adj(adj + sp.eye(adj.shape[0]))
    # return sparse_to_tuple(adj_normalized)
    return adj_normalized.A


## Processing

In [None]:
features = sp.identity(features.shape[0])
features = preprocess_features(features)

support = [preprocess_adj(adj)]
num_supports = 1
model_func = GCN

## Define Placeholders

In [None]:
t_features = torch.from_numpy(features)
t_y_train = torch.from_numpy(y_train)
t_y_val = torch.from_numpy(y_val)
t_y_test = torch.from_numpy(y_test)
t_train_mask = torch.from_numpy(train_mask.astype(np.float32))
tm_train_mask = torch.transpose(torch.unsqueeze(t_train_mask, 0), 1, 0).repeat(1, y_train.shape[1])

t_support = []
for i in range(len(support)):
    t_support.append(torch.Tensor(support[i]))

model = model_func(input_dim=features.shape[0], support=t_support, num_classes=y_train.shape[1])


## Loss and Optimizer

In [None]:
cfg=CONFIG()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=cfg.learning_rate)


## Define Model and Evalution Function

In [None]:
def evaluate(features, labels, mask):
    t_test = time.time()
    model.eval()
    with torch.no_grad():
        logits = model(features)
        t_mask = torch.from_numpy(np.array(mask*1., dtype=np.float32))
        tm_mask = torch.transpose(torch.unsqueeze(t_mask, 0), 1, 0).repeat(1, labels.shape[1])
        loss = criterion(logits * tm_mask, torch.max(labels, 1)[1])
        pred = torch.max(logits, 1)[1]
        acc = ((pred == torch.max(labels, 1)[1]).float() * t_mask).sum().item() / t_mask.sum().item()
        
    return loss.numpy(), acc, pred.numpy(), labels.numpy(), (time.time() - t_test)


In [None]:
import datetime
def print_log(msg='', end='\n'):
    now = datetime.datetime.now()
    t = str(now.year) + '/' + str(now.month) + '/' + str(now.day) + ' ' \
      + str(now.hour).zfill(2) + ':' + str(now.minute).zfill(2) + ':' + str(now.second).zfill(2)

    if isinstance(msg, str):
        lines = msg.split('\n')
    else:
        lines = [msg]
        
    for line in lines:
        if line == lines[-1]:
            print('[' + t + '] ' + str(line), end=end)
        else: 
            print('[' + t + '] ' + str(line))

## Train Model

In [None]:
val_losses = []
for epoch in range(cfg.epochs):

    t = time.time()
    
    # Forward pass
    logits = model(t_features)
    loss = criterion(logits * tm_train_mask, torch.max(t_y_train, 1)[1])    
    acc = ((torch.max(logits, 1)[1] == torch.max(t_y_train, 1)[1]).float() * t_train_mask).sum().item() / t_train_mask.sum().item()
        
    # Backward and optimize
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # Validation
    val_loss, val_acc, pred, labels, duration = evaluate(t_features, t_y_val, val_mask)
    val_losses.append(val_loss)

    print_log("Epoch: {:.0f}, train_loss= {:.5f}, train_acc= {:.5f}, val_loss= {:.5f}, val_acc= {:.5f}, time= {:.5f}"\
                .format(epoch + 1, loss, acc, val_loss, val_acc, time.time() - t))

    if epoch > cfg.early_stopping and val_losses[-1] > np.mean(val_losses[-(cfg.early_stopping+1):-1]):
        print_log("Early stopping...")
        break


print_log("Optimization Finished!")

## Testing

In [None]:
from sklearn import metrics
test_loss, test_acc, pred, labels, test_duration = evaluate(t_features, t_y_test, test_mask)
print_log("Test set results: \n\t loss= {:.5f}, accuracy= {:.5f}, time= {:.5f}".format(test_loss, test_acc, test_duration))

test_pred = []
test_labels = []
for i in range(len(test_mask)):
    if test_mask[i]:
        test_pred.append(pred[i])
        test_labels.append(np.argmax(labels[i]))


print_log("Test Precision, Recall and F1-Score...")
print_log(metrics.classification_report(test_labels, test_pred, digits=4))
print_log("Macro average Test Precision, Recall and F1-Score...")
print_log(metrics.precision_recall_fscore_support(test_labels, test_pred, average='macro'))
print_log("Micro average Test Precision, Recall and F1-Score...")
print_log(metrics.precision_recall_fscore_support(test_labels, test_pred, average='micro'))


## Doc and Word Embeddings

In [None]:
tmp = model.layer1.embedding.numpy()
word_embeddings = tmp[train_size: adj.shape[0] - test_size]
train_doc_embeddings = tmp[:train_size]  # include val docs
test_doc_embeddings = tmp[adj.shape[0] - test_size:]

print_log('Embeddings:')
print_log('\rWord_embeddings:'+str(len(word_embeddings)))
print_log('\rTrain_doc_embeddings:'+str(len(train_doc_embeddings))) 
print_log('\rTest_doc_embeddings:'+str(len(test_doc_embeddings))) 
print_log('\rWord_embeddings:') 
print(word_embeddings)

In [None]:
word_vectors = []
for i in range(vocab_size):
    word = vocab[i].strip()
    word_vector = word_embeddings[i]
    word_vector_str = ' '.join([str(x) for x in word_vector])
    word_vectors.append(word + ' ' + word_vector_str)

word_embeddings_str = '\n'.join(word_vectors)
#print(word_vectors[0])

doc_vectors = []
doc_id = 0
for i in range(train_size):
    doc_vector = train_doc_embeddings[i]
    doc_vector_str = ' '.join([str(x) for x in doc_vector])
    doc_vectors.append('doc_' + str(doc_id) + ' ' + doc_vector_str)
    doc_id += 1

for i in range(test_size):
    doc_vector = test_doc_embeddings[i]
    doc_vector_str = ' '.join([str(x) for x in doc_vector])
    doc_vectors.append('doc_' + str(doc_id) + ' ' + doc_vector_str)
    doc_id += 1

doc_embeddings_str = '\n'.join(doc_vectors)