In [None]:
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
import math as mt
import random 
import copy as cp

In [None]:
from google.colab import drive
drive.mount('/content/drive' , force_remount = True)

Mounted at /content/drive


In [None]:
data = pd.read_csv("/content/drive/MyDrive/Project/train.tsv" , sep= '\t')
mod_data = pd.DataFrame({"Id":[] , "EssaySet" :[] , "Score1" : [] , "Score2" : [] , "EssayText" : []})
for i in range(1,3):
  k = data.loc[data["EssaySet"] == i]
  mod_data = pd.concat([mod_data,k])
mod_data.describe()
scores1 = np.array(mod_data['Score1'].tolist())
scores2 = np.array(mod_data['Score2'].tolist())
Eset = np.array(mod_data['EssaySet'].tolist()).reshape(-1,1)
scores = (scores1 + scores2)/2
scores = scores.reshape(-1,1)
labels = np.concatenate((scores, Eset) , axis =1)

In [None]:
#Load vectors Here
#=======Load Vectors==================#

inputs_all = np.load('/content/drive/MyDrive/Project/all_vecFile_12 _final.npy')
print(inputs_all.shape)
# predicting the scores

from sklearn.model_selection import train_test_split
from torch.utils.data import TensorDataset, DataLoader
X_train,X_test,Y_train,Y_test = train_test_split(inputs_all, labels , test_size=0.2, random_state=33)

X_train = torch.Tensor(X_train)
X_test = torch.Tensor(X_test)
Y_train = torch.Tensor(Y_train)
Y_test = torch.Tensor(Y_test)

train = TensorDataset(X_train , Y_train)
test = TensorDataset(X_test , Y_test)

train_loader = DataLoader(train, batch_size=20, shuffle=False)
test_loader = DataLoader(test, batch_size=20, shuffle=False)

(2950, 42, 300)


In [None]:
!pip install git+https://github.com/janfreyberg/pytorch-revgrad.git

In [None]:
import math
import torch
import torch.nn as nn
import torch.utils
import torch.utils.data
from torch.autograd import Variable
import matplotlib.pyplot as plt 
from pytorch_revgrad import RevGrad

# loading local VRNN hidden vectors
hidden_locals = np.load("/content/drive/MyDrive/Project/hiddens_final.npy")

# changing device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
EPS = torch.finfo(torch.float).eps # numerical logs
hidden_locals = torch.from_numpy(hidden_locals)


class VRNN(nn.Module):
    def __init__(self, x_dim, h_dim, z_dim, n_layers, output_dims, domains ,  bias=False):
        super(VRNN, self).__init__()

        self.x_dim = x_dim
        self.h_dim = h_dim
        self.z_dim = z_dim
        self.n_layers = n_layers
        self.domains = domains
        self.output_dims = output_dims

        #feature-extracting transformations
        self.phi_x = nn.Sequential(
            nn.Linear(x_dim, h_dim),
            nn.ReLU(),
            nn.Linear(h_dim, h_dim),
            nn.ReLU())
        self.phi_z = nn.Sequential(
            nn.Linear(z_dim, h_dim),
            nn.ReLU())

        #encoder
        self.enc = nn.Sequential(
            nn.Linear(h_dim + h_dim, h_dim),
            nn.ReLU(),
            nn.Linear(h_dim, h_dim),
            nn.ReLU())
        self.enc_mean = nn.Linear(h_dim, z_dim)
        self.enc_std = nn.Sequential(
            nn.Linear(h_dim, z_dim),
            nn.Softplus())

        #prior
        self.prior = nn.Sequential(
            nn.Linear(h_dim, h_dim),
            nn.ReLU())
        self.prior_mean = nn.Linear(h_dim, z_dim)
        self.prior_std = nn.Sequential(
            nn.Linear(h_dim, z_dim),
            nn.Softplus())

        #decoder
        self.dec = nn.Sequential(
            nn.Linear(h_dim + h_dim, h_dim),
            nn.ReLU(),
            nn.Linear(h_dim, h_dim),
            nn.ReLU())
        self.dec_std = nn.Sequential(
            nn.Linear(h_dim, x_dim),
            nn.Softplus())
        #self.dec_mean = nn.Linear(h_dim, x_dim)
        self.dec_mean = nn.Sequential(
            nn.Linear(h_dim, x_dim),
            nn.Sigmoid())

        #recurrence
        self.rnn = nn.LSTM(h_dim + h_dim, h_dim, n_layers, bias)

        # Classifier 
        self.classifier = nn.Sequential(
            nn.Linear(h_dim, output_dims),
            nn.Softmax(dim=1))

        # Domain Disciminator
        self.discriminator = nn.Sequential(
            nn.Linear(h_dim , domains),
            nn.Softmax(dim=1),
            RevGrad())

    def forward(self, x ):

        all_enc_mean, all_enc_std = [], []
        all_dec_mean, all_dec_std = [], []
        kld_loss = 0
        nll_loss = 0
        out = []

        h = torch.zeros(self.n_layers, x.size(1), self.h_dim, device=device)
        for t in range(x.size(0)):
            phi_x_t = self.phi_x(x[t])

            #encoder
            enc_t = self.enc(torch.cat([phi_x_t, h[-1]], 1))
            enc_mean_t = self.enc_mean(enc_t)
            enc_std_t = self.enc_std(enc_t) 
            #prior
            prior_t = self.prior(h[-1])
            prior_mean_t = self.prior_mean(prior_t)
            prior_std_t = self.prior_std(prior_t)

            #sampling and reparameterization
            z_t = self._reparameterized_sample(enc_mean_t, enc_std_t)
            phi_z_t = self.phi_z(z_t)

            #decoder
            dec_t = self.dec(torch.cat([phi_z_t, h[-1]], 1))
            dec_mean_t = self.dec_mean(dec_t)
            dec_std_t = self.dec_std(dec_t)

            #recurrence
            out , h = self.rnn(torch.cat([phi_x_t, phi_z_t], 1).unsqueeze(0), h)

            #computing losses
            kld_loss += self._kld_gauss(enc_mean_t, enc_std_t, prior_mean_t, prior_std_t)
            #nll_loss += self._nll_gauss(dec_mean_t, dec_std_t, x[t])
            nll_loss += self._nll_bernoulli(dec_mean_t, x[t])

        # Computing Diff Loss
        #taking the last hidden layer and computing l2 norm with the local VRNN hiddens
        fin_hidden = h[:,-1,:]
        diff_loss = float(torch.sum(fin_hidden * hidden_locals))


        # The last hidden computed layer , will be utilized for domain discriminator and domain classifier
        class_vector = self.classifier(out)[0]
        domain_vector = self.discriminator(out)[0]

        return kld_loss, nll_loss, h , class_vector.data , domain_vector.data , diff_loss ,\
            (all_enc_mean, all_enc_std), \
            (all_dec_mean, all_dec_std)


    def sample(self, seq_len):

        sample = torch.zeros(seq_len, self.x_dim, device=device)

        h = torch.zeros(self.n_layers, 1, self.h_dim, device=device)
        for t in range(seq_len):

            #prior
            prior_t = self.prior(h[-1])
            prior_mean_t = self.prior_mean(prior_t)
            prior_std_t = self.prior_std(prior_t)

            #sampling and reparameterization
            z_t = self._reparameterized_sample(prior_mean_t, prior_std_t)
            phi_z_t = self.phi_z(z_t)

            #decoder
            dec_t = self.dec(torch.cat([phi_z_t, h[-1]], 1))
            dec_mean_t = self.dec_mean(dec_t)
            #dec_std_t = self.dec_std(dec_t)

            phi_x_t = self.phi_x(dec_mean_t)

            #recurrence
            _, h = self.rnn(torch.cat([phi_x_t, phi_z_t], 1).unsqueeze(0), h)

            sample[t] = dec_mean_t.data

        return sample


    def reset_parameters(self, stdv=1e-1):
        for weight in self.parameters():
            weight.data.normal_(0, stdv)


    def _init_weights(self, stdv):
        pass


    def _reparameterized_sample(self, mean, std):
        """using std to sample"""
        eps = torch.empty(size=std.size(), device=device, dtype=torch.float).normal_()
        return eps.mul(std).add_(mean)


    def _kld_gauss(self, mean_1, std_1, mean_2, std_2):
        """Using std to compute KLD"""

        kld_element =  (2 * torch.log(std_2 + EPS) - 2 * torch.log(std_1 + EPS) + 
            (std_1.pow(2) + (mean_1 - mean_2).pow(2)) /
            std_2.pow(2) - 1)
        return	0.5 * torch.sum(kld_element)


    def _nll_bernoulli(self, theta, x):
        return - torch.sum(x*torch.log(theta + EPS) + (1-x)*torch.log(1-theta-EPS))


    def _nll_gauss(self, mean, std, x):
        return torch.sum(torch.log(std + EPS) + torch.log(2*torch.pi)/2 + (x - mean).pow(2)/(2*std.pow(2)))

In [None]:
x_dim = 300
h_dim = 2*50   # 2 Essays , each considered as one vector
z_dim = 10
n_layers = 1 
n_epochs = 100
clip = 10
learning_rate = 0.0081
seed = 128
print_every = 10 # batches
save_every = 1 # epochs
domains = 2
classes = len(np.unique(scores))
batch_size = 20



In [None]:
state_dict = torch.load('/content/drive/MyDrive/Project/weights')
model = VRNN(x_dim, h_dim, z_dim ,n_layers, classes ,domains)
model = model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
model.load_state_dict(state_dict)

<All keys matched successfully>

In [None]:
correct = 0

with torch.no_grad():
    for i,data in enumerate(test_loader):
        
        data = test[0][0]
        class_labels = test[1][0]
        domain_labels = test[1][1]
        data = data.squeeze().transpose(0, 1)
        data = (data - data.min()) / (data.max() - data.min())
        kld_loss, nll_loss,h, class_vector , domain_vector ,  diff_loss , _, _ = model(data)
        
        print(f'{i+1}.)    {str(class_vector.argmax().item())}     {class_labels}')
        
        if(class_vector.argmax().item() == Y_test[i]):
            correct+=1
            
print(f'We got {correct} correct')