# Mounting Google drive

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
cd drive/MyDrive/Class/AIChemistry/HandsOn14-Res,Attn

/content/drive/MyDrive/Class/AIChemistry/HandsOn14-Res,Attn


#Data Loader

In [51]:
import csv
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from torch.utils.data.dataloader import default_collate
from torch.utils.data.sampler import SubsetRandomSampler
import torch.nn as nn

# https://www.kaggle.com/datasets/uciml/mushroom-classification

def one_hot_encode(data,uniques):
    onehot = torch.zeros((len(data),len(uniques)))
    for i,d in enumerate(data):
        onehot[i,uniques.index(d)] = 1
    return onehot

def normalize(d):
    d = np.array([float(dd) for dd in d])
    avg = np.mean(d)
    std = np.std(d)
    return torch.Tensor((d - avg)/std).reshape(-1,1), avg, std

class Data(Dataset):
  def __init__(self, data_path):
        data = []
        with open(data_path) as f:
            reader = csv.reader(f)
            for i, row in enumerate(reader):
                if i == 0:
                    continue
                data.append(row)
        data = np.array(data)
        
        Ys_raw = list([float(f) for f in data[:,0]])

        # extract X
        Xs1 = []
        Xs2 = []
        Xs3 = []
        Xs4 = []
        Xs5 = []
        Xs6 = []
        Xs7 = []
        Xs8 = []
        Xs9 = []

        x_uniques = list(set(data[:,1]))
        x = one_hot_encode(data[:,1],x_uniques)
        Xs1.append(x)
        
        x_uniques = list(set(data[:,2]))
        x = one_hot_encode(data[:,2],x_uniques)
        Xs2.append(x)
        
        x, _, _ = normalize(data[:,3])
        Xs3.append(x)
        
        x_uniques = list(set(data[:,4]))
        x = one_hot_encode(data[:,4],x_uniques)
        Xs4.append(x)
        
        x, _, _ = normalize(data[:,5])
        Xs5.append(x)

        x_uniques = list(set(data[:,6]))
        x = one_hot_encode(data[:,6],x_uniques)
        Xs6.append(x)        

        x, _, _ = normalize(data[:,7])
        Xs7.append(x)

        x, _, _ = normalize(data[:,8])
        Xs8.append(x)

        x, _, _ = normalize(data[:,9])
        Xs9.append(x)

        self.Xs1 = torch.cat(Xs1,axis=0)
        self.Xs2 = torch.cat(Xs2,axis=0)
        self.Xs3 = torch.cat(Xs3,axis=0)
        self.Xs4 = torch.cat(Xs4,axis=0)
        self.Xs5 = torch.cat(Xs5,axis=0)
        self.Xs6 = torch.cat(Xs6,axis=0)
        self.Xs7 = torch.cat(Xs7,axis=0)
        self.Xs8 = torch.cat(Xs8,axis=0)
        self.Xs9 = torch.cat(Xs9,axis=0)
        
        # save it to the object
        self.Ys_raw = torch.Tensor(Ys_raw)
        self.input_fea_len = (self.Xs1.shape[1],
                              self.Xs2.shape[1],
                              self.Xs3.shape[1],
                              self.Xs4.shape[1],
                              self.Xs5.shape[1],
                              self.Xs6.shape[1],
                              self.Xs7.shape[1],
                              self.Xs8.shape[1],
                              self.Xs9.shape[1])
  def normalize_y(self,idx):
    # extract car price
    Ys, avg, std = normalize(self.Ys_raw[idx])
    self.Ys = (self.Ys_raw-avg)/std
    self.Yavg = avg
    self.Ystd = std
  
  def denormalize_y(self,Ys):
    return np.array(Ys)*self.Ystd+self.Yavg

  def __len__(self):
    return self.Ys_raw.shape[0]

  def __getitem__(self, idx):
    y = self.Ys[idx]
    x = (self.Xs1[idx,:],self.Xs2[idx,:],self.Xs3[idx,:],self.Xs4[idx,:],self.Xs5[idx,:],self.Xs6[idx,:],self.Xs7[idx,:],self.Xs8[idx,:],self.Xs9[idx,:])
    return idx, y,x

#Model

In [76]:
import torch.nn as nn

class NN(nn.Module):
    def __init__(self, input_fea_len, fea_len=64, n_layer=3):
        # model initialize. neural network layers
        super().__init__()
        
        self.act = nn.Softplus()
        self.lin1 = nn.Linear(input_fea_len,fea_len)
        self.lin2 = nn.Linear(fea_len,fea_len)
        self.lin3 = nn.Linear(fea_len,fea_len)
        self.output_layer = nn.Linear(fea_len, 1)
        
    def forward(self, x):
        x = self.lin1(x)
        x = self.act(x)
        x = self.lin2(x)
        x = self.act(x)
        x = self.lin3(x)
        x = self.act(x)
        x = self.output_layer(x)
        return x



class NNResDense(nn.Module):
    def __init__(self, input_fea_len, fea_len=64, n_layer=3):
        # model initialize. neural network layers
        super().__init__()
        
        self.act = nn.Softplus()
        self.embedding = nn.Linear(input_fea_len,fea_len)

        self.lin1 = nn.Linear(fea_len,fea_len)
        self.lin2 = nn.Linear(fea_len,fea_len)
        self.lin3 = nn.Linear(fea_len,fea_len)

        self.output_layer = nn.Linear(fea_len, 1)
        
    def forward(self, x):
        x = self.embedding(x)
        x = self.act(x)

        x1 = self.lin1(x)
        x1 = self.act(x1)
    
        x2 = self.lin2(x1)
        x2 = self.act(x2)
        
        x3 = self.lin3(x2+x1)
        x3 = self.act(x3)

        z = self.act(x1+x2+x3)
        z = self.output_layer(z)
        return z



class NNAttn(nn.Module):
    def __init__(self, input_fea_len, fea_len=32):
        # model initialize. neural network layers
        super().__init__()
        
        self.act = nn.Softplus()
        self.embedding = nn.ModuleList([nn.Linear(x,fea_len) for x in input_fea_len])
        nhead = 4
        ndes = len(input_fea_len)
        self.lin = nn.Linear(fea_len,3*nhead*fea_len)
        self.vsoftmax = nn.Softmax(dim=2)

        self.output_layer = nn.Linear(fea_len*ndes*nhead, 1)
        self.nhead = nhead
        self.ndes = ndes
        self.fea_len = fea_len
    def forward(self, x):
        x = [self.embedding[i](xx) for i,xx in enumerate(x)]
        x = torch.stack(x,1)
        qkv = self.lin(x)
        q,k,v = torch.split(qkv,self.nhead*self.fea_len,dim=2)
        q = q.reshape(q.shape[0],self.ndes,self.nhead,self.fea_len) # b * q * h * c
        k = k.reshape(q.shape[0],self.ndes,self.nhead,self.fea_len) # b * v * h * c
        v = v.reshape(q.shape[0],self.ndes,self.nhead,self.fea_len) # b * v * h * c
        qk = torch.einsum('bqhc,bvhc->bqvh', q, k)
        attn = self.vsoftmax(qk)  # b * q * v * h
        out = torch.einsum('bqvh,bvhc->bqhc', attn, v) # b * q * h * c
        out = out.reshape(x.shape[0],-1)
        z = self.act(out)
        z = self.output_layer(z)
        return z

# Using Model

In [54]:
from time import time
import random
import numpy as np
import torch.optim as optim

def use_model(data_loader, model, criterion, optimizer, i_iter, mode, name = None):
  assert mode in ['train','predict']
  #switch to model mode
  if mode == 'train':
    model.train()
  elif mode == 'predict':
    model.eval() # activates all neurons in the dropout layer

  targets = []
  outputs = [] 
  idxss=[]
  for idxs, ys,xs in data_loader: # loop for each batch
    # move input to cuda
    if next(model.parameters()).is_cuda:
      for i in range(len(xs)):
        xs[i] = xs[i].to(device='cuda')
      ys = ys.to(device='cuda')
        
    #compute output
    if mode == 'train':
      output = model(xs)
      outputs += output.detach().cpu().tolist()
    elif mode == 'predict':
      with torch.no_grad(): # it does not compute the gradient. so it's faster
        output = model(xs)
      outputs += output.cpu().tolist()
    # Measure accuracy
    ys = ys.reshape(-1,1)
    loss = criterion(output, ys)
    
    # Backward propagation
    if mode == 'train':
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()
    
    targets += ys.cpu().tolist() # list concatenation
    idxss += idxs.tolist()
  return outputs,targets,idxss

# Model Training and Validation

In [77]:
def get_loader(dataset, collate_fn=default_collate,
                batch_size=64, idx_sets=None,
                num_workers=0, pin_memory=False):
  loaders = []
  for idx in idx_sets:
    loaders.append(DataLoader(dataset, batch_size=batch_size,
                      sampler=SubsetRandomSampler(idx),
                      num_workers=num_workers,
                      collate_fn=collate_fn, pin_memory=pin_memory))
  return loaders
from sklearn import metrics
################################ Input ####################################
# data
data_path='carprice.csv'
TrainValTeSplitst = [0.8, 0.1, 0.1]

# Training
batch_size = 64
lr = 0.001
nepochs = 100
cuda = True
seed = 1234
###########################################################################

# Loading data
print('loading data...',end=''); t = time()
data = Data(data_path)
print('completed', time()-t,'sec')

# Make a split
## number of train and validation
ndata = len(data)
ntrain = int(ndata*TrainValTeSplitst[0])
nval = int(ndata*TrainValTeSplitst[1])
## randomize
idxs = list(range(ndata))
random.seed(seed)
random.shuffle(idxs)
## split index
train_idx = idxs[:ntrain]
val_idx = idxs[ntrain:ntrain+nval]
test_idx = idxs[ntrain+nval:]

data.normalize_y(train_idx)

## get data loader
train_loader, val_loader, test_loader = get_loader(data,
    batch_size=batch_size,idx_sets=[train_idx,val_idx,test_idx],pin_memory=True)

#build model
model = NNAttn(data.input_fea_len)
if cuda:
  if torch.cuda.device_count() > 1:
    model = nn.DataParallel(model,device_ids=[0])
  model.cuda()

## Training
criterion = torch.nn.MSELoss() # regression (continuous)
optimizer = optim.Adam(model.parameters(),lr,weight_decay= 0) # 0 means no penalty

bestval_mseloss = float('inf')
for i_iter in range(nepochs): # epochs
  output,target,idxs = use_model(train_loader,model,criterion,optimizer,i_iter,'train') # training model
  print('Train MSE loss [%d]:'%i_iter, criterion(torch.Tensor(target), torch.Tensor(output)))
  output,target,idxs = use_model(val_loader,model,criterion,optimizer,i_iter,'predict','Val') # using the model to validation set
  Val_mseloss = criterion(torch.Tensor(target), torch.Tensor(output)).tolist() # validation MSE error is calculated
  print('Val MSE loss [%d]:'%i_iter, Val_mseloss, end=' ') 
  if Val_mseloss < bestval_mseloss: # if validation set error is lower than previous best
    bestval_mseloss = Val_mseloss
    print('<-Best')
    torch.save(model.state_dict(),'Weights.pth.tar') # we save the data
  else: print('')
  #scheduler.step()



loading data...completed 0.14378976821899414 sec
Train MSE loss [0]: tensor(0.5066)
Val MSE loss [0]: 0.2994920313358307 <-Best
Train MSE loss [1]: tensor(0.2229)
Val MSE loss [1]: 0.22565555572509766 <-Best
Train MSE loss [2]: tensor(0.1713)
Val MSE loss [2]: 0.16049855947494507 <-Best
Train MSE loss [3]: tensor(0.1094)
Val MSE loss [3]: 0.11831630766391754 <-Best
Train MSE loss [4]: tensor(0.0920)
Val MSE loss [4]: 0.08508443832397461 <-Best
Train MSE loss [5]: tensor(0.0825)
Val MSE loss [5]: 0.09104679524898529 
Train MSE loss [6]: tensor(0.0752)
Val MSE loss [6]: 0.08146826922893524 <-Best
Train MSE loss [7]: tensor(0.0710)
Val MSE loss [7]: 0.06368638575077057 <-Best
Train MSE loss [8]: tensor(0.0651)
Val MSE loss [8]: 0.05931686982512474 <-Best
Train MSE loss [9]: tensor(0.0714)
Val MSE loss [9]: 0.06413474678993225 
Train MSE loss [10]: tensor(0.0565)
Val MSE loss [10]: 0.054567284882068634 <-Best
Train MSE loss [11]: tensor(0.0577)
Val MSE loss [11]: 0.061726879328489304 
Trai

In [78]:
from tqdm import tqdm
print('Testing. Loading best model')
model.load_state_dict(torch.load('Weights.pth.tar'))
output,target,idxs = use_model(test_loader,model,criterion,optimizer,i_iter,'predict','Test')
I = np.argsort(idxs)
output = np.array(output)[I]
target = np.array(target)[I]
o, t = data.denormalize_y(output), data.denormalize_y(target)
print('Predict MeanAbsoluteError:', torch.mean(torch.abs(torch.Tensor(o)-torch.Tensor(t))))

Testing. Loading best model
Predict MeanAbsoluteError: tensor(1995.3127)


In [13]:
print(o[:5],t[:5])

[[22340.34386648]
 [37481.10502546]
 [18301.1342341 ]
 [29947.51594397]
 [18406.11547837]] [[20497.99960413]
 [37499.99973567]
 [18639.9994472 ]
 [29989.99941271]
 [20994.99969853]]
