<a href="https://colab.research.google.com/github/Rick-Feng-u/Deep-Learning-Models-for-Structured-Data/blob/main/Training_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pickle
import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F

import matplotlib.pyplot as plt
import matplotlib.ticker as ticker

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import sys
sys.path.append('/content/drive/MyDrive/Honour_Thesis')

In [None]:
from Util import sequence

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
class Encoder(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(Encoder, self).__init__()
        self.hidden_size = hidden_size

        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)

    def forward(self, input, hidden):
        embedded = self.embedding(input).view(1, 1, -1)
        output = embedded
        output, hidden = self.gru(output, hidden)
        return output, hidden

    def init_hidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

In [None]:
class Decoder(nn.Module):
    def __init__(self, hidden_size, output_size):
        super(Decoder, self).__init__()
        self.hidden_size = hidden_size

        self.embedding = nn.Embedding(output_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)
        self.out = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, input, hidden):
        output = self.embedding(input).view(1, 1, -1)
        output = F.relu(output)
        output, hidden = self.gru(output, hidden)
        output = self.softmax(self.out(output[0]))
        return output, hidden

    def init_hidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

In [None]:
class attention_decoder(nn.Module):
    def __init__(self, hidden_size, output_size, max_length, dropout_p=0.1):
        super(attention_decoder, self).__init__()
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.dropout_p = dropout_p
        self.max_length = max_length

        self.embedding = nn.Embedding(self.output_size, self.hidden_size)
        self.attn = nn.Linear(self.hidden_size * 2, self.max_length)
        self.attn_combine = nn.Linear(self.hidden_size * 2, self.hidden_size)
        self.dropout = nn.Dropout(self.dropout_p)
        self.gru = nn.GRU(self.hidden_size, self.hidden_size)
        self.out = nn.Linear(self.hidden_size, self.output_size)

    def forward(self, input, hidden, encoder_outputs):
        embedded = self.embedding(input).view(1, 1, -1)
        embedded = self.dropout(embedded)

        attn_weights = F.softmax(
            self.attn(torch.cat((embedded[0], hidden[0]), 1)), dim=1)
        attn_applied = torch.bmm(attn_weights.unsqueeze(0),
                                 encoder_outputs.unsqueeze(0))

        output = torch.cat((embedded[0], attn_applied[0]), 1)
        output = self.attn_combine(output).unsqueeze(0)

        output = F.relu(output)
        output, hidden = self.gru(output, hidden)

        output = F.log_softmax(self.out(output[0]), dim=1)
        return output, hidden, attn_weights

    def init_hidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

In [None]:
def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion,
          max_length):
    encoder_hidden = encoder.init_hidden()

    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    input_length = input_tensor.size(0)
    target_length = target_tensor.size(0)

    encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

    loss = 0

    for input_elem in range(input_length):
        encoder_output, encoder_hidden = encoder(
            input_tensor[input_elem], encoder_hidden)
        encoder_outputs[input_elem] = encoder_output[0, 0]


    decoder_input = torch.tensor([[0]], device=device)

    decoder_hidden = encoder_hidden

    for target in range(target_length):
        decoder_output, decoder_hidden, decoder_attention = decoder(
            decoder_input, decoder_hidden, encoder_outputs)
        loss += criterion(decoder_output, target_tensor[target])
        decoder_input = target_tensor[target]

    loss.backward()

    encoder_optimizer.step()
    decoder_optimizer.step()

    return loss.item() / target_length

In [None]:
def show_plot(points):
    plt.figure()
    fig, ax = plt.subplots()
    # this locator puts ticks at regular intervals
    loc = ticker.MultipleLocator(base=0.2)
    ax.yaxis.set_major_locator(loc)
    plt.plot(points)
    plt.show()

In [None]:
def train_iters(training_pairs, max_length, encoder, decoder, print_every=1000, plot_every=100, learning_rate=0.01):
    plot_losses = []
    print_loss_total = 0  # Reset every print_every
    plot_loss_total = 0  # Reset every plot_every

    encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)
    criterion = nn.NLLLoss()

    training_data_size = len(training_pairs)

    for i in range(training_data_size):
        training_pair = training_pairs[i][0]
        input_idx_seq = training_pair[0]
        target_idx_seq = training_pair[1]

        input_idx_seq.append(1)  # EOS
        target_idx_seq.append(1)

        # print(input_idx_seq)
        input_tensor = torch.tensor(input_idx_seq, dtype=torch.long, device=device).view(-1, 1)
        target_tensor = torch.tensor(target_idx_seq, dtype=torch.long, device=device).view(-1, 1)

        loss = train(input_tensor, target_tensor, encoder,
                     decoder, encoder_optimizer, decoder_optimizer, criterion, max_length)
        print_loss_total += loss
        plot_loss_total += loss

        if i % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            print("loss average  " + str(print_loss_avg))

        if i % plot_every == 0:
            plot_loss_avg = plot_loss_total / plot_every
            plot_losses.append(plot_loss_avg)
            plot_loss_total = 0

    show_plot(plot_losses)

# **Wiki pairs**

In [None]:
train_pairs =[]

with open('wiki_seq_1.pkl', 'rb') as f:
  pair = pickle.load(f)
  train_pairs.extend(pair)

with open('wiki_seq_2.pkl', 'rb') as f:
  pair = pickle.load(f)
  train_pairs.extend(pair)

with open('wiki_seq_3.pkl', 'rb') as f:
  pair = pickle.load(f)
  train_pairs.extend(pair)

with open('wiki_seq_4.pkl', 'rb') as f:
  pair = pickle.load(f)
  train_pairs.extend(pair)

with open('wiki_seq_5.pkl', 'rb') as f:
  pair = pickle.load(f)
  train_pairs.extend(pair)

with open('wiki_seq_6.pkl', 'rb') as f:
  pair = pickle.load(f)
  train_pairs.extend(pair)

with open('wiki_seq_9.pkl', 'rb') as f:
  pair = pickle.load(f)
  train_pairs.extend(pair)

with open('wiki_input_seq_class.pkl', 'rb') as f:
  input_seq_class = pickle.load(f)

with open('wiki_output_seq_class.pkl', 'rb') as f:
  out_seq_class = pickle.load(f)

In [None]:
print(train_pairs[1000][0])

In [None]:
list_len = [len(i[0]) for i in train_pairs]
train_max= max(list_len) + 2

In [None]:
test_pairs = []

with open('wiki_seq_7.pkl', 'rb') as f:
  pair = pickle.load(f)
  test_pairs.extend(pair)

with open('wiki_seq_8.pkl', 'rb') as f:
  pair = pickle.load(f)
  test_pairs.extend(pair)

In [None]:
list_len = [len(i[0]) for i in test_pairs]
test_max = max(list_len) + 2
input_seq_class.highest_length = max(train_max, test_max)
print(input_seq_class.highest_length)

In [None]:
encoder_ = torch.load('encoder.pth')
decoder_ = torch.load('decoder.pth')

# **DBLP pairs**

In [None]:
pairs =[]

with open('dblp_seq_1.pkl', 'rb') as f:
  pair = pickle.load(f)
  pairs.extend(pair)

with open('dblp_seq_2.pkl', 'rb') as f:
  pair = pickle.load(f)
  pairs.extend(pair)

with open('dblp_seq_3.pkl', 'rb') as f:
  pair = pickle.load(f)
  pairs.extend(pair)

with open('dblp_input_seq_class.pkl', 'rb') as f:
  input_seq_class = pickle.load(f)

with open('dblp_output_seq_class.pkl', 'rb') as f:
  out_seq_class = pickle.load(f)

In [None]:
print(out_seq_class.element)

In [None]:
article = 0
book = 0
processing = 0
inprocessding = 0
www= 0
master = 0
colloc = 0
phd = 0
for pair in pairs:
  if pair[0][1][0] == 2:
    article += 1
  elif pair[0][1][0] == 3:
    book += 1
  elif pair[0][1][0] == 4:
    processing += 1
  elif pair[0][1][0] == 5:
    inprocessding += 1
  elif pair[0][1][0] == 6:
    www += 1
  elif pair[0][1][0] == 7:
    master += 1
  elif pair[0][1][0] == 8:
    colloc += 1
  elif pair[0][1][0] == 9:
    phd += 1

print("a: " +str(article) + " b: "+str(book)+" p: "+str(processing)+" i: "+str(inprocessding)+" w: "+str(www)+" m: "+str(master)+" c: "+str(colloc)+" phd: "+str(phd))

In [None]:
test_pairs = []
num_art = 2000
num_book = 800
num_pro = 500
num_inpro = 2000
num_www = 800
num_inco = 500
num_phd = 500
for pair in pairs:
  if pair[0][1][0] == 2:
    if num_art != 0:
      test_pairs.append(pair)
      pairs.remove(pair)
      num_art -= 1
  elif pair[0][1][0] == 3: 
    if num_book != 0:
      test_pairs.append(pair)
      pairs.remove(pair)
      num_book -= 1
  elif pair[0][1][0] == 4:
    if num_pro != 0:
      test_pairs.append(pair)
      pairs.remove(pair)
      num_pro -= 1
  elif pair[0][1][0] == 5:
    if num_inpro != 0:
      test_pairs.append(pair)
      pairs.remove(pair)
      num_inpro -= 1
  elif pair[0][1][0] == 6:
    if num_www != 0:
      test_pairs.append(pair)
      pairs.remove(pair)
      num_www -= 1
  elif pair[0][1][0] == 8:
    if num_inco != 0:
      test_pairs.append(pair)
      pairs.remove(pair)
      num_inco -= 1
  elif pair[0][1][0] == 9:
    if num_phd != 0:
      test_pairs.append(pair)
      pairs.remove(pair)
      num_phd -= 1


train_pairs = pairs
print(test_pairs)


In [None]:
input_seq_class.highest_length = input_seq_class.highest_length + 2

# **Training Code**

In [None]:
hidden_size = 256
epoch = 2
in_size = input_seq_class.size_of_index
out_size= out_seq_class.size_of_index
encoder_ = Encoder(in_size, hidden_size).to(device)
decoder_ = attention_decoder(hidden_size, out_size, input_seq_class.highest_length).to(device)
#decoder_ = Decoder(hidden_size, out_size).to(device)
train_iters(train_pairs, input_seq_class.highest_length, encoder_, decoder_)
torch.save(encoder_, 'drive/MyDrive/Honour_Thesis/encoder.pth')
torch.save(decoder_, 'drive/MyDrive/Honour_Thesis/decoder.pth')

In [None]:
from google.colab import files
files.download( "encoder.pth" ) 
files.download( "decoder.pth" ) 

# **Evaluation Code**

In [None]:
def evaluate(testing_seq, encoder, decoder, output_class, max_length):
    with torch.no_grad():
        input_tensor = testing_seq
        input_length = input_tensor.size()[0]
        encoder_hidden = encoder.init_hidden()

        encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

        for i in range(input_length):
            encoder_output, encoder_hidden = encoder(input_tensor[i],
                                                     encoder_hidden)
            encoder_outputs[i] = encoder_output[0, 0]

        decoder_input = torch.tensor([[0]], device=device)  # SOS

        decoder_hidden = encoder_hidden

        decoded_words = []
        decoder_attentions = torch.zeros(max_length, max_length)

        for i in range(max_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
            decoder_attentions[i] = decoder_attention.data
            topv, topi = decoder_output.data.topk(1)
            if topi.item() == 1:
                break
            else:
                decoded_words.append(topi.item())

            decoder_input = topi.squeeze().detach()

        return decoded_words, decoder_attentions[:i + 1]

In [None]:
print(out_seq_class.element)

In [None]:
def evaluate_iter(pairs, encoder, decoder, out_seq_class, input_seq_class):
    total_correct_prediction = 0
    total_test_apirs = len(pairs)
    for i in range(len(pairs)):
        pair = pairs[i][0]
        input_tensor = torch.tensor(pair[0], dtype=torch.long, device=device).view(-1, 1)
        target, attentions = evaluate(input_tensor, encoder_, decoder_, out_seq_class, input_seq_class.highest_length)

        true_target = pair[1]

        print(true_target)

    #return total_correct_prediction/total_test_apirs

In [None]:
def evaluate_iter(pairs, encoder, decoder, out_seq_class, input_seq_class):
    total_correct_prediction = 0
    total_test_apirs = len(pairs)
    pair = pairs[100][0]
    input_tensor = torch.tensor(pair[0], dtype=torch.long, device=device).view(-1, 1)
    target, attentions = evaluate(input_tensor, encoder_, decoder_, out_seq_class, input_seq_class.highest_length)

In [None]:
evaluate_iter(test_pairs, encoder_, decoder_, out_seq_class, input_seq_class)

# **Attension graph**

In [None]:
def showAttention(input_sentence, output_words, attentions):
    # Set up figure with colorbar
    fig = plt.figure()
    ax = fig.add_subplot(111)
    cax = ax.matshow(attentions.numpy(), cmap='bone')
    fig.colorbar(cax)

    # Set up axes
    ax.set_xticklabels([''] + input_sentence.split(' ') +
                       ['<EOS>'], rotation=90)
    ax.set_yticklabels([''] + output_words)

    # Show label at every tick
    ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
    ax.yaxis.set_major_locator(ticker.MultipleLocator(1))

    plt.show()


def evaluateAndShowAttention(input_sentence):
    output_words, attentions = evaluate(
        encoder_, attention_decoder, input_sentence)
    print('input =', input_sentence)
    print('output =', ' '.join(output_words))
    showAttention(input_sentence, output_words, attentions)

In [None]:
evaluateAndShowAttention()