In [1]:
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
from ase.io import read,write
from ase import Atoms, Atom
from ase.visualize import view

import numpy as np
from pymatgen.core.structure import Structure
from pymatgen.io.ase import AseAtomsAdaptor
import pymatgen as mg

from torch.utils.data import Dataset, DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
from torch.autograd import Variable
import torch.optim as optim
from torchtext import data # torchtext.data 임포트
from torchtext.data import Iterator
from torch.utils.data import Dataset, DataLoader


import csv
import pandas as pd

# from data import CIFData, AtomCustomJSONInitializer, GaussianDistance
import os
import csv
import random


In [2]:
import pandas as pd
df = pd.read_csv('/home/cut6089/research/GASpy/data/co_motif.csv')

target_df = df[['name','target' ]]
# target_df.to_csv('/home/cut6089/research/GASpy/data/co_target.csv')


## Utils

In [3]:

def remove_adsorbates(atoms):
    copied_atoms= atoms.copy()
    binding_sites = []
    adsorbates_index = []
    
    for atom in atoms:
        if atom.tag == 1:
            binding_sites.append([atom.symbol,list(atom.position)])
            adsorbates_index.append(atom.index)
    del copied_atoms[adsorbates_index]
    bare_slab = copied_atoms.copy()
    
    return binding_sites, bare_slab

def get_nearest_atoms(atoms):
    # view(atoms)
    binding_sites, slab = remove_adsorbates(atoms)
    binding_sites.sort(key = lambda x: x[1][2] )    
    
    copied_atom = slab.copy()
    copied_atom += Atom(binding_sites[0][0],binding_sites[0][1],tag =1 )
    copied_atom = copied_atom.repeat((3,3,1))
    ads_index = np.where((copied_atom.get_tags()) ==1)[0][4]
    
    structure = AseAtomsAdaptor.get_structure(copied_atom)
    # nn = structure.get_neighbors(site=structure[ads_index] , r= min(structure.lattice.abc))
    nn = structure.get_neighbors(site=structure[ads_index] , r= 10)
    nn.sort(key = lambda x : x[1]) # sort nearest atoms
    nn_index = [nn[i][2] for i in range(len(nn))]
    nn_distances =[nn[i][1] for i in range(len(nn))]
    # nn_positions = [list(copied_atom[i].position) for i in range(len(nn_index))]
    
    
    # view(copied_atom)
    return copied_atom, nn_index,nn_distances

def get_atom_property(atoms):
    global feature
    feature_df = pd.DataFrame(columns=[f'feat{i}' for i in range(11)])
    atom, nn_index,nn_distances= get_nearest_atoms(atoms)
    elements = [atom[i].symbol for i in nn_index[:15]]
    distances = nn_distances[:15]
    def block_to_num(block):
        """
        Convert blokc to number

        Args:
            block (str) : 's', 'p', 'd' or 'f'

        Return:
            int : 1, 2, 3, 4
        """

        if block == 's':
            return 1
        elif block == 'p':
            return 2
        elif block == 'd':
            return 3
        elif block == 'f':
            return 4

    for el, distance in zip(elements, distances):
        e = mg.core.Element(el)
        atomic_number = e.Z
        average_ionic_radius = e.average_ionic_radius.real

        # Lowest oxidiation state of the element is used as common oxidation state
        common_oxidation_states = e.common_oxidation_states[0]
        Pauling_electronegativity = e.X
        row = e.row
        group = e.group
        thermal_conductivity = e.thermal_conductivity.real
        boiling_point = e.boiling_point.real
        melting_point = e.melting_point.real
        block = block_to_num(e.block)
        IE = e.ionization_energy
        
        
        feature_df.loc[len(feature_df)] = [atomic_number, common_oxidation_states, Pauling_electronegativity, 
                                           row, group, thermal_conductivity, boiling_point,
                                           melting_point, block, IE,distance]
    feature = feature_df.to_numpy()
    # feature = feature/distance
    feature = torch.Tensor(feature)
    
    # nn_positions = [list(atom[i].position) for i in range(len(nn_index))]
    # R = np.array(nn_positions)
    # Dij = np.linalg.norm(R[:, None, :] - R[None,:,:], axis =-1)
    # edge = torch.Tensor(Dij[:15,:15])
    # feature.unsqueeze_(0).shape
    return feature

def get_edge_feature(atoms):
    atom, nn_index,nn_distances = get_nearest_atoms(atoms)
    nn_positions = [list(atom[i].position) for i in nn_index]
    R = np.array(nn_positions)
    Dij = np.linalg.norm(R[:, None, :] - R[None,:,:], axis =-1)
    Dij =  np.where(Dij>4, 1e9, Dij)
    edge = torch.Tensor(Dij[:15,:15])
    return edge
    


In [4]:
class make_dataset(Dataset):
    def __init__(self, root_dir, dmin = 0, step = 0.2, random_seed = 123):
        self.root_dir = root_dir
        target_file = os.path.join(self.root_dir, 'data/co_target.csv')
        target_df = pd.read_csv(f'{self.root_dir}data/co_target.csv')
        random.seed(random_seed)
        self.target_df = target_df.sample(frac=1).reset_index(drop= True)
    
    def __len__(self):
        return len(self.target_df)
    
    def __getitem__(self, idx):
        traj_id = self.target_df['name'][idx]
        target = self.target_df['target'][idx]
        atoms = read(f'{self.root_dir}final_CO_slab/{traj_id}.traj')
        feature = get_atom_property(atoms)
        # feature= torch.Tensor(feature)
        edge = get_edge_feature(atoms)
        # edge = torch.Tensor(edge)
        # target= torch.Tensor(float(target))
        
        return feature,edge, target
    
def collate_fn(dataset_list):
    """
    list of tuples for each data point.
    """
    batch_feature = []
    batch_target = []
    batch_edge = []
    for  i, (feature,edge, target) in enumerate(dataset_list):
        batch_feature.append(feature)
        batch_edge.append(edge)
        batch_target.append(target)
    batch_feature = torch.nn.utils.rnn.pad_sequence(batch_feature, batch_first = True) 
    # batch_edge = torch.nn.utils.rnn.pad_sequence(batch_edge, batch_first = True) 
    batch_target = torch.Tensor(batch_target)
    return batch_feature,batch_edge,batch_target
    

In [6]:
target_df

Unnamed: 0,name,target
0,mp-10010_5d83020d30582ea2977a314b,-1.756184
1,mp-10010_5d83cd3a4eaf9091a4055c72,-1.496603
2,mp-10010_5d83f778dafe868ae5d44057,-1.769610
3,mp-10010_5d8688095436ecdede9a240e,-0.365174
4,mp-10010_5d8688095436ecdede9a2467,-1.797853
...,...,...
18450,mvc-16102_5d83f779dafe868ae5d46655,0.000546
18451,mvc-16102_5d83f779dafe868ae5d46656,0.008152
18452,mvc-16102_5d83f779dafe868ae5d46657,0.005785
18453,mvc-16380_5d83020f30582ea2977aba24,-0.069272


In [269]:
edge = get_edge_feature(atom)
torch.exp(-edge)

tensor([[1.0000e+00, 7.8284e-02, 6.9831e-02, 3.8958e-02, 7.2993e-02, 1.8168e-02,
         3.0975e-02, 7.8004e-02, 1.1768e-02, 1.3648e-02, 2.1054e-02, 6.3273e-03,
         1.7054e-02, 3.1656e-03, 1.2765e-02],
        [7.8284e-02, 1.0000e+00, 5.4920e-03, 1.5807e-02, 2.6413e-02, 2.7784e-03,
         7.7862e-02, 3.4934e-02, 1.1076e-03, 7.4792e-02, 2.4565e-03, 1.9477e-02,
         2.7001e-03, 1.9621e-03, 5.4577e-03],
        [6.9831e-02, 5.4920e-03, 1.0000e+00, 1.3008e-02, 2.7520e-02, 3.1961e-02,
         3.9369e-03, 1.9589e-02, 7.3588e-02, 1.1917e-03, 8.4791e-02, 7.5142e-04,
         2.9844e-02, 1.3712e-03, 6.0122e-03],
        [3.8958e-02, 1.5807e-02, 1.3008e-02, 1.0000e+00, 2.8637e-03, 8.3179e-02,
         1.9621e-03, 1.1974e-02, 1.5240e-02, 1.2566e-02, 2.4015e-03, 3.2884e-02,
         2.1613e-02, 7.7862e-02, 7.0387e-02],
        [7.2993e-02, 2.6413e-02, 2.7520e-02, 2.8637e-03, 1.0000e+00, 1.9621e-03,
         8.3179e-02, 2.8712e-02, 2.2499e-03, 2.9073e-03, 2.3907e-02, 7.1228e-04,
      

In [12]:
target_df = pd.read_csv(f'/home/cut6089/research/GASpy/data/co_target.csv')
# target_df['target']
for i  in  range(len(target_df)):
    traj_id = target_df['name'][i]
    atom = read(f'/home/cut6089/research/GASpy/final_CO_slab/{traj_id}.traj')
    edge = get_edge_feature(atom)
    # print(edge.shape)

KeyboardInterrupt: 

In [35]:
torch.relu()

5.20842596

In [23]:
e = np.array(edge)
np.where(e>4, 1e-9,e)
def get_attn_decoder_mask(seq):
    subsequent_mask = torch.ones_like(seq).unsqueeze(-1).expand(seq.size(0), seq.size(1), seq.size(1))
    subsequent_mask = subsequent_mask.triu(diagonal=1) # upper triangular part of a matrix(2-D)
    return subsequent_mask

array([[0.0000000e+00, 1.7828101e+00, 1.7828701e+00, 1.7829072e+00,
        2.6076171e+00, 2.6077347e+00, 2.6077297e+00, 2.6076341e+00,
        2.6076772e+00, 2.6076984e+00, 3.1676407e+00, 3.1676126e+00,
        3.1678600e+00, 2.6351542e+00, 2.6351371e+00],
       [1.7828101e+00, 0.0000000e+00, 2.5020888e+00, 2.5018449e+00,
        1.7717068e+00, 9.9999997e-10, 9.9999997e-10, 1.7718118e+00,
        3.1328790e+00, 3.1330810e+00, 9.9999997e-10, 2.6050844e+00,
        2.6056070e+00, 1.8514887e+00, 1.8516368e+00],
       [1.7828701e+00, 2.5020888e+00, 0.0000000e+00, 2.5017655e+00,
        9.9999997e-10, 1.7717710e+00, 3.1329126e+00, 3.1331327e+00,
        9.9999997e-10, 1.7718580e+00, 2.6050336e+00, 9.9999997e-10,
        2.6056607e+00, 3.1638532e+00, 1.8515917e+00],
       [1.7829072e+00, 2.5018449e+00, 2.5017655e+00, 0.0000000e+00,
        3.1330729e+00, 3.1330609e+00, 1.7719184e+00, 9.9999997e-10,
        1.7718964e+00, 9.9999997e-10, 2.6052978e+00, 2.6053958e+00,
        9.9999997e-10,

In [200]:
struct_files = '/home/cut6089/research/GASpy/final_CO_slab/'
           
atoms = read(struct_files+'mp-10010_5d83020d30582ea2977a314b.traj') 
structure = AseAtomsAdaptor.get_structure(atoms)
# nn = structure.get_neighbors(site=structure[ads_index] , r= min(structure.lattice.abc))


atom, nn_index,nn_distances= get_nearest_atoms(atoms)
get_edge_feature(atoms).shape
get_atom_property(atoms).shape

torch.Size([15, 10])

In [7]:
dataset= make_dataset(root_dir= '/home/cut6089/research/GASpy/')
total_size = len(dataset)
indices = list(range(total_size))
train_ratio = 0.7
valid_ratio = 0.15
train_size = int(total_size * train_ratio)
valid_size = int(total_size * valid_ratio)

train_sampler =SubsetRandomSampler(indices[:train_size])
# valid_sampler = SubsetRandomSampler(indices[:valid_size])
train_loader = DataLoader(dataset, batch_size = 50,sampler=train_sampler, num_workers=1 )
dataiter = iter(train_loader)
inputs, edge, labels= next(dataiter)
# features.shape


In [183]:
features[0].shape

torch.Size([15, 10])

In [54]:
a = features[0]
layernorm = nn.LayerNorm(10,eps =1e-6)
a = layernorm(a)
a = a.view(-1).long()


In [61]:
features[0].shape
emb = nn.Embedding(150, 512)
emb(a).view(15,10,-1)

tensor([[[ 1.1264, -0.3888, -0.5008,  ...,  0.3766, -1.8642, -1.1399],
         [ 1.1264, -0.3888, -0.5008,  ...,  0.3766, -1.8642, -1.1399],
         [ 1.1264, -0.3888, -0.5008,  ...,  0.3766, -1.8642, -1.1399],
         ...,
         [-1.7256,  0.2991,  0.1027,  ..., -0.3914, -1.0141, -1.8899],
         [ 1.1264, -0.3888, -0.5008,  ...,  0.3766, -1.8642, -1.1399],
         [ 1.1264, -0.3888, -0.5008,  ...,  0.3766, -1.8642, -1.1399]],

        [[ 1.1264, -0.3888, -0.5008,  ...,  0.3766, -1.8642, -1.1399],
         [ 1.1264, -0.3888, -0.5008,  ...,  0.3766, -1.8642, -1.1399],
         [ 1.1264, -0.3888, -0.5008,  ...,  0.3766, -1.8642, -1.1399],
         ...,
         [-1.7256,  0.2991,  0.1027,  ..., -0.3914, -1.0141, -1.8899],
         [ 1.1264, -0.3888, -0.5008,  ...,  0.3766, -1.8642, -1.1399],
         [ 1.1264, -0.3888, -0.5008,  ...,  0.3766, -1.8642, -1.1399]],

        [[ 1.1264, -0.3888, -0.5008,  ...,  0.3766, -1.8642, -1.1399],
         [ 1.1264, -0.3888, -0.5008,  ...,  0

In [59]:
# torch.cat(targets,0).shape
torch.Tensor(targets)[0]

tensor(-1.9572)

## Train Model

In [10]:
model = Transformer(nn_nums=15, feature_nums=11, n_layers=6, hidden_size=128, filter_size=2048,  dropout_rate=0.1)
criterion = nn.MSELoss()
# criterion =  nn.NLLLoss()

optimizer = optim.SGD(model.parameters(), lr = 0.01)
# for i, data in enumerate(train_loader):
#     inputs, labels = data
#     print(inputs.shape, labels.shape)

In [11]:
mae_errors_all = []


for epoch in range(10): # loop over t he dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(train_loader,0):
        # mae_erros = []
        
        # get the inputs; data is a list of [inputs, labels]
        inputs, edges, labels = data
        a,b = inputs.shape, labels.shape
        
        # zero the parameter gradients
        optimizer.zero_grad()
        
        # forward + backward + optimize
        outputs = model(inputs,edges,labels)
        outputs =  outputs.type(torch.float32)
        loss = criterion(outputs, labels.to(torch.float32))
        # loss.requires_grad_(True)
        loss.backward()
        optimizer.step()
        
        # print statistics
        running_loss += loss.item()
        print(f'[{epoch +1}, {i+1:5d}] loss: {loss:.4f}')
        mae_error = torch.mean(abs(outputs-labels))
        mae_errors_all.append(mae_error)
        print(f'mae error: {mae_error} ')
        if i % 10 == 9:
            # print(f'[{epoch +1}, {i+1:5d}] loss: {running_loss/ 2000:.3f}')
            # print(f'mae error: {mae_error} ')
            running_loss = 0.0
print('Finished Training')
        
        

[1,     1] loss: 0.9015
mae error: 0.781693225453119 


KeyboardInterrupt: 

## Transformer

In [9]:
def initialize_weight(x):
    nn.init.xavier_uniform_(x.weight)
    if x.bias is not None:
        nn.init.constant_(x.bias, 0)
        

class MultiHeadAttention(nn.Module):
    def __init__(self, hidden_size, dropout_rate , head_size = 8):
        super(MultiHeadAttention, self).__init__()
        
        self.hidden_size = hidden_size
        self.dropout_rate = dropout_rate
        self.head_size = head_size
        self.att_size = att_size = hidden_size // head_size
        self.scale = att_size ** -0.5
        
        self.linear_q = nn.Linear(hidden_size, head_size * att_size, bias = False)
        self.linear_k = nn.Linear(hidden_size, head_size * att_size, bias = False)
        self.linear_v = nn.Linear(hidden_size, head_size * att_size, bias = False)
        
        self.linear_edge = nn.Linear(1, head_size, bias = False)
        initialize_weight(self.linear_q)
        initialize_weight(self.linear_k)
        initialize_weight(self.linear_v)
        
        self.att_dropout = nn.Dropout(dropout_rate)
        self.output_layer = nn.Linear(head_size * att_size, hidden_size, bias = False)
        initialize_weight(self.output_layer)
        
    def forward(self, q, k, v, edge):
        orig_q_size = q.size()
        
        d_k = self.att_size
        d_v = self.att_size
        batch_size = q.size(0)
        
        # head_i 
        q = self.linear_q(q).view(batch_size, -1, self.head_size, d_k)
        k = self.linear_k(k).view(batch_size, -1 , self.head_size, d_k)
        v = self.linear_v(v).view(batch_size, -1, self.head_size, d_v)
        edge = self.linear_edge(edge.unsqueeze(1).transpose(1,-1)).transpose(1,3)
        
        q= q.transpose(1,2)
        v = v.transpose(1, 2)
        k = k.transpose(1, 2).transpose(2,3)
        
        # scaled dot product
        q.mul_(self.scale)
        x = torch.matmul(q,k)
        x  = x+edge
        x = torch.softmax(x, dim = 3)
        x = self.att_dropout(x)
        x= x.matmul(v)
        
        x= x.transpose(1,2).contiguous()
        x = x.view(batch_size, -1, self.head_size * d_v)
        
        x = self.output_layer(x)
        return (x)

class FeedForwardNetwork(nn.Module):
    def __init__(self, hidden_size, filter_size, dropout_rate):
        super(FeedForwardNetwork, self).__init__()

        self.layer1 = nn.Linear(hidden_size, filter_size)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout_rate)
        self.layer2 = nn.Linear(filter_size, hidden_size)

        initialize_weight(self.layer1)
        initialize_weight(self.layer2)

    def forward(self, x):
        x = self.layer1(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.layer2(x)
        return x

    
class EncoderLayer(nn.Module):
    def __init__(self, hidden_size, filter_size, dropout_rate):
        super(EncoderLayer, self).__init__()

        self.self_attention_norm = nn.BatchNorm1d(hidden_size, eps=1e-6)
        self.self_attention = MultiHeadAttention(hidden_size, dropout_rate)
        self.self_attention_dropout = nn.Dropout(dropout_rate)

        self.ffn_norm = nn.BatchNorm1d(hidden_size, eps=1e-6)
        self.ffn = FeedForwardNetwork(hidden_size, filter_size, dropout_rate)
        self.ffn_dropout = nn.Dropout(dropout_rate)

    def forward(self, x, edge):  # pylint: disable=arguments-differ
        y = self.self_attention_norm(x.transpose(1,2)).transpose(1,2)
        y = self.self_attention(y, y, y,edge)
        y = self.self_attention_dropout(y)
        x = x + y

        y = self.ffn_norm(x.transpose(1,2)).transpose(1,2)
        y = self.ffn(y)
        y = self.ffn_dropout(y)
        x = x + y
        return x

# class DecoderLayer(nn.Module):
#     def __init__(self, hidden_size, filter_size, dropout_rate):
#         super(DecoderLayer, self).__init__()
        
#         self.self_attention_norm = nn.LayerNorm(hidden_size, eps = 1e-6)
#         self.self_attention = MultiHeadAttentioin(hidden_size=hidden_size, dropout_rate=dropout_rate)
#         self.self_attention_dropout = nn.Dropout(dropout_rate)
        
#         self.enc_dec_attention_norm = nn.LayerNorm(hidden_size, eps=1e-6)
#         self.enc_dec_attention = MultiHeadAttentioin(hidden_size, dropout_rate)
#         self.enc_dec_attention_dropout = nn.Dropout(dropout_rate)
        
#         self.ffn_norm =  nn.LayerNorm(hidden_size, eps = 1e-6)
#         self.ffn =  FeedForwardNetwork(hidden_size, filter_size,  dropout_rate)
#         self.ffn_dropout = nn.Dropout(dropout_rate)
        
#     def forward(self, x, enc_output):
#         y = self.self_attention_norm(x)
#         y = self.self_attention(y,y,y)
#         y = self.self_attention_dropout(y)
#         x = x+y
        
#         y = self.enc_dec_attention_norm(x)
#         y = self.enc_dec_attention_norm(y, enc_output, enc_output)
#         y = self.enc_dec_attention_dropout(y)
#         x= x+y
        
#         y= self.ffn_norm(x)
#         y = self.ffn(y)
#         y = self.ffn_dropout(y)
#         x = x+y
#         return x
    
class DecoderLayer(nn.Module):
    def __init__(self, hidden_size, filter_size, dropout_rate):
        super(DecoderLayer, self).__init__()

        self.self_attention_norm = nn.LayerNorm(hidden_size, eps=1e-6)
        self.self_attention = MultiHeadAttention(hidden_size, dropout_rate)
        self.self_attention_dropout = nn.Dropout(dropout_rate)

        self.enc_dec_attention_norm = nn.LayerNorm(hidden_size, eps=1e-6)
        self.enc_dec_attention = MultiHeadAttention(hidden_size, dropout_rate)
        self.enc_dec_attention_dropout = nn.Dropout(dropout_rate)

        self.ffn_norm = nn.LayerNorm(hidden_size, eps=1e-6)
        self.ffn = FeedForwardNetwork(hidden_size, filter_size, dropout_rate)
        self.ffn_dropout = nn.Dropout(dropout_rate)

    def forward(self, x, enc_output):
        y = self.self_attention_norm(x)
        y = self.self_attention(y, y, y)
        y = self.self_attention_dropout(x)
        x = x + y

        if enc_output is not None:
            y = self.enc_dec_attention_norm(x)
            y = self.enc_dec_attention(y, enc_output, enc_output)
            y = self.enc_dec_attention_dropout(y)
            x = x + y

        y = self.ffn_norm(x)
        y = self.ffn(y)
        y = self.ffn_dropout(y)
        x = x + y
        return x
    
    
class Encoder(nn.Module):
    def __init__(self, hidden_size, filter_size, dropout_rate, n_layers):
        super(Encoder, self).__init__()
        
        encoders = [EncoderLayer(hidden_size= hidden_size, filter_size= filter_size, dropout_rate = dropout_rate)
                   for _ in range(n_layers)]
        self.layers = nn.ModuleList(encoders)
        self.hidden_size = hidden_size
        
        self.last_norm = nn.BatchNorm1d(hidden_size, eps = 1e-6)
        
    def forward(self, inputs,edge):
        encoder_output = inputs
        for enc_layer in self.layers:
            encoder_output = enc_layer(encoder_output, edge)
        return self.last_norm(encoder_output.transpose(1,2)).transpose(1,2)
    
class Decoder(nn.Module):
    def __init__(self, hidden_size, filter_size, dropout_rate, n_layers):
        super(Decoder, self).__init__()
        
        decoders = [DecoderLayer(hidden_size, filter_size, dropout_rate)
                   for _ in range(n_layers)]
        self.layers = nn.ModuleList(decoders)
        self.last_norm = nn.LayerNorm(hidden_size, eps = 1e-6 )
        
    def forward(self, targets, enc_output):
        decoder_ouput = targets
        for dec_layer in self.layers:
            decoder_ouput= dec_layer(decoder_ouput, enc_output)
        return self.last_norm(decoder_ouput)
    

    
    
class Transformer(nn.Module):
    def __init__(self, nn_nums, feature_nums, n_layers = 6, hidden_size = 512,
                filter_size = 2048, dropout_rate = 0.1):
        super(Transformer, self).__init__()
        
        self.hidden_size = hidden_size
        self.emb_scale=  hidden_size ** 0.5
        
        # self.t_vocab_embedding = nn.Embedding(t_vocab_size, hidden_size)
        self.input_normalize = nn.BatchNorm1d(feature_nums,eps =1e-6)
        self.edge_normalize  =   nn.BatchNorm1d(nn_nums, eps=1e-6)
        self.target_normalize = nn.LayerNorm(1, eps= 1e-6)
        
        # self.t_vocab_embedding = nn.Embedding(t_vocab_size, hidden_size)
        # nn.init.normal_(self.t_vocab_embedding.weight, mean = 0, std = hidden_size ** -0.5)
        # self.t_emb_dropout = nn.Dropout(dropout_rate)
        
        self.i_vocab_embedding1 = nn.Linear(feature_nums,hidden_size)
        # self.i_vocab_embedding2 = nn.Linear(1, hidden_size)
        self.edge_embedding = nn.Linear(nn_nums, hidden_size)
        nn.init.normal_(self.i_vocab_embedding1.weight, mean = 0 , std = hidden_size ** -0.5)
        self.i_emb_dropout = nn.Dropout(dropout_rate)
        self.encoder = Encoder(hidden_size, filter_size, dropout_rate, n_layers)
        self.decoder = Decoder(hidden_size, filter_size, dropout_rate, n_layers)
        # self.out = nn.Linear(t_vocab_size *1, 1)
        
        self.out1 = nn.Linear(hidden_size, 1)
        self.out2= nn.Linear(nn_nums,1)
        
    
    def forward(self, inputs, edge, targets):
        # input_normed = self.input_normalize(inputs.float()).long()
        batch_size = inputs.size(0)
        enc_output = self.encode(inputs,edge)
        out1= self.out1(enc_output).squeeze()
        out2= self.out2(out1).squeeze()
        return  out2
    
    def encode(self, inputs, edge):
        inputs = self.input_normalize(inputs.transpose(1,2)).transpose(1,2)
        edge = self.edge_normalize(torch.exp(edge))
        # Input embedding
        input_embedded = self.i_vocab_embedding1(inputs)
        # edge_embedded = self.edge_embedding(edge)
        # input_embedded = self.i_vocab_embedding2(input_embedded)
        input_embedded *- self.emb_scale
        input_embedded = self.i_emb_dropout(input_embedded)
        
        return self.encoder(input_embedded,edge)
    
    
    
    
#     def decode(self, targets, enc_output):
#         # target embedding
#         targets = self.target_normalize(targets.view(-1,1).to(torch.float32)).long()
#         target_embedded = self.t_vocab_embedding(targets.long())
#         # target_embedded *= self.emb_scale
#         target_embedded = self.t_emb_dropout(target_embedded).to(torch.float32)
#         decoder_output = self.decoder(target_embedded, enc_output)
#         # output = torch.matmul(decoder_output, self.t_vocab_embedding.weight.transpose(0,1))
#         weights =self.t_vocab_embedding.weight.unsqueeze(0).transpose(1,2)
#         # output = torch.matmul(decoder_output, weights)
#         # output = self.out(output).squeeze()
#         output = self.out(decoder_output.squeeze()).view(-1)

#         return output
    
        
        

In [14]:
model = Transformer(nn_nums= 15, feature_nums=10, n_layers=6, hidden_size=512, filter_size=2048,  dropout_rate=0.1)
features.shape
edge.shape
a =  nn.Linear(10, 512)(features)
linear_q = nn.Linear(512, 8 * 64, bias = False)
b = linear_q(a).view(50, -1, 8, 64).transpose(1, 2)
c = b.transpose(2,3)
mul = torch.matmul(b,c)
edge.shape
edge_a = nn.Linear(15,8 * 64, bias = False )(edge)
edge_b = linear_q(edge_a).view(50, -1, 8, 64).transpose(1, 2)
mul.shape
edge_b.shape
e = nn.Linear(1,8)(edge.unsqueeze(1).transpose(1,-1)).transpose(1,3)
# e = nn
# torch.stack([mul,e]).shape
e[0][0].shape
mul[0][0].shape
(e+mul).shape

NameError: name 'features' is not defined

In [282]:
from torchsummary import summary
summary(model, (15,10,512), 16)

TypeError: forward() missing 2 required positional arguments: 'edge' and 'targets'

In [276]:
pip install torchsummary


Collecting torchsummary
  Downloading torchsummary-1.5.1-py3-none-any.whl (2.8 kB)
Installing collected packages: torchsummary
Successfully installed torchsummary-1.5.1
Note: you may need to restart the kernel to use updated packages.


In [335]:
model = Transformer(nn_nums= 15, feature_nums=10, n_layers=6, hidden_size=512, filter_size=2048,  dropout_rate=0.1)
enc_output = model.encode(inputs)
a = nn.BatchNorm1d(15,eps =1e-6)(inputs)
a = nn.Linear(10, 512)(a)
a.shape
# b= nn.Linear(1, 512)(a)
b = nn.Linear(512,1)(enc_output).squeeze()
nn.Linear(15,1)(b).squeeze().shape


torch.Size([50])

In [45]:
model = Transformer(i_vocab_size= 3000, t_vocab_size=100, n_layers=6, hidden_size=512, filter_size=2048,  dropout_rate=0.1)


hidden_size = 512
dropout_rate = 0.1
filter_size =2048
head_size = 8
att_size = hidden_size//head_size

enc_output= model.encode(inputs)
target_normalize = nn.LayerNorm(1, eps= 1e-6)
# model.decode(inputs, labels)
targets = target_normalize(targets.view(-1,1).float())
t_vocab_embedding = nn.Embedding(100, 512)
target_embedded = t_vocab_embedding(targets.long())
target_embedded = nn.Dropout(dropout_rate)(target_embedded)
decoder_output = Decoder(hidden_size=512, dropout_rate=0.1,filter_size = 2048, n_layers=6)(target_embedded,enc_output)
# target_embedded.shape


t_vocab_embedding.weight.shape
final_output = decoder_output
weights = torch.tensor(t_vocab_embedding.weight, device = 'cuda').unsqueeze(0).transpose(1,2)
                                                                                

output = torch.tensor(final_output, device= 'cuda')
print('final shape:', output.shape)
print('weight:', weights.shape)
print('decoder: ', final_output.shape)
matmul = torch.matmul(output, weights)
print('malmul: ', matmul.shape)



TypeError: __init__() got an unexpected keyword argument 'i_vocab_size'

In [10]:
# model.encode(feature)
model = Transformer(i_vocab_size= 100, t_vocab_size=1, n_layers=6, hidden_size=512, filter_size=2048,  dropout_rate=0.1)
input_normed = nn.LayerNorm(10,eps =1e-6)(inputs.float()).long()
input_embedded = nn.Embedding(100, 512)(input_normed)
target_normed = nn.LayerNorm(50, eps = 1e-6)(targets.float()).long()
# out = model.encode(input_normed)
model.decode(targets.reshape(-1,1), out )
# a = nn.LayerNorm(512, eps=1e-6)(input_embedded)
b = MultiHeadAttention(512, 0.1)(input_embedded,input_embedded,input_embedded)
input_embedded.shape
input_embedded.view(-1,200,512).shape
EncoderLayer(hidden_size=512, filter_size=2048,  dropout_rate=0.1)(input_embedded.view(-1,200,512))
Encoder(n_layers=6, hidden_size=512, filter_size=2048,  dropout_rate=0.1)(input_embedded.view(-1,200,512))
model.encode(inputs)

NameError: name 'out' is not defined

In [1]:
# "H, W, N, M이 공백으로 구분되어 주어진다. (0 < H, W, N, M ≤ 50,000)"
inputs = input()
inputs = [int(x) for x in inputs.split()]
H = inputs[0]
W = inputs[1]
N = inputs[2]
M = inputs[3]

if 0<H<=50000 and 0<W<=50000 and 0<N<50000 and 0<M<50000:
    i =1
    h_num = 0
    w_num = 0
    while i <= H:
        n = (N+1)*i - N
        i+=1
        if n <=H:
            h_num +=1
    i=1
    while i <= W:
        m = (M+1)*i - M
        i+=1
        if m <=W:
            w_num +=1
    
    print(h_num*w_num)
            
            

 5 4 1 1


6


In [116]:
import math

import torch
import torch.nn as nn
import torch.nn.functional as F



# pylint: disable=arguments-differ


def initialize_weight(x):
    nn.init.xavier_uniform_(x.weight)
    if x.bias is not None:
        nn.init.constant_(x.bias, 0)


class FeedForwardNetwork(nn.Module):
    def __init__(self, hidden_size, filter_size, dropout_rate):
        super(FeedForwardNetwork, self).__init__()

        self.layer1 = nn.Linear(hidden_size, filter_size)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout_rate)
        self.layer2 = nn.Linear(filter_size, hidden_size)

        initialize_weight(self.layer1)
        initialize_weight(self.layer2)

    def forward(self, x):
        x = self.layer1(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.layer2(x)
        return x


class MultiHeadAttention(nn.Module):
    def __init__(self, hidden_size, dropout_rate, head_size=8):
        super(MultiHeadAttention, self).__init__()

        self.head_size = head_size

        self.att_size = att_size = hidden_size // head_size
        self.scale = att_size ** -0.5

        self.linear_q = nn.Linear(hidden_size, head_size * att_size, bias=False)
        self.linear_k = nn.Linear(hidden_size, head_size * att_size, bias=False)
        self.linear_v = nn.Linear(hidden_size, head_size * att_size, bias=False)
        initialize_weight(self.linear_q)
        initialize_weight(self.linear_k)
        initialize_weight(self.linear_v)

        self.att_dropout = nn.Dropout(dropout_rate)

        self.output_layer = nn.Linear(head_size * att_size, hidden_size,
                                      bias=False)
        initialize_weight(self.output_layer)

    def forward(self, q, k, v, mask, cache=None):
        orig_q_size = q.size()

        d_k = self.att_size
        d_v = self.att_size
        batch_size = q.size(0)

        # head_i = Attention(Q(W^Q)_i, K(W^K)_i, V(W^V)_i)
        q = self.linear_q(q).view(batch_size, -1, self.head_size, d_k)
        if cache is not None and 'encdec_k' in cache:
            k, v = cache['encdec_k'], cache['encdec_v']
        else:
            k = self.linear_k(k).view(batch_size, -1, self.head_size, d_k)
            v = self.linear_v(v).view(batch_size, -1, self.head_size, d_v)

            if cache is not None:
                cache['encdec_k'], cache['encdec_v'] = k, v

        q = q.transpose(1, 2)                  # [b, h, q_len, d_k]
        v = v.transpose(1, 2)                  # [b, h, v_len, d_v]
        k = k.transpose(1, 2).transpose(2, 3)  # [b, h, d_k, k_len]

        # Scaled Dot-Product Attention.
        # Attention(Q, K, V) = softmax((QK^T)/sqrt(d_k))V
        q.mul_(self.scale)
        x = torch.matmul(q, k)  # [b, h, q_len, k_len]
        x.masked_fill_(mask.unsqueeze(1), -1e9)
        x = torch.softmax(x, dim=3)
        x = self.att_dropout(x)
        x = x.matmul(v)  # [b, h, q_len, attn]

        x = x.transpose(1, 2).contiguous()  # [b, q_len, h, attn]
        x = x.view(batch_size, -1, self.head_size * d_v)

        x = self.output_layer(x)

        assert x.size() == orig_q_size
        return x


class EncoderLayer(nn.Module):
    def __init__(self, hidden_size, filter_size, dropout_rate):
        super(EncoderLayer, self).__init__()

        self.self_attention_norm = nn.LayerNorm(hidden_size, eps=1e-6)
        self.self_attention = MultiHeadAttention(hidden_size, dropout_rate)
        self.self_attention_dropout = nn.Dropout(dropout_rate)

        self.ffn_norm = nn.LayerNorm(hidden_size, eps=1e-6)
        self.ffn = FeedForwardNetwork(hidden_size, filter_size, dropout_rate)
        self.ffn_dropout = nn.Dropout(dropout_rate)

    def forward(self, x, mask):  # pylint: disable=arguments-differ
        y = self.self_attention_norm(x)
        y = self.self_attention(y, y, y, mask)
        y = self.self_attention_dropout(y)
        x = x + y

        y = self.ffn_norm(x)
        y = self.ffn(y)
        y = self.ffn_dropout(y)
        x = x + y
        return x


class DecoderLayer(nn.Module):
    def __init__(self, hidden_size, filter_size, dropout_rate):
        super(DecoderLayer, self).__init__()

        self.self_attention_norm = nn.LayerNorm(hidden_size, eps=1e-6)
        self.self_attention = MultiHeadAttention(hidden_size, dropout_rate)
        self.self_attention_dropout = nn.Dropout(dropout_rate)

        self.enc_dec_attention_norm = nn.LayerNorm(hidden_size, eps=1e-6)
        self.enc_dec_attention = MultiHeadAttention(hidden_size, dropout_rate)
        self.enc_dec_attention_dropout = nn.Dropout(dropout_rate)

        self.ffn_norm = nn.LayerNorm(hidden_size, eps=1e-6)
        self.ffn = FeedForwardNetwork(hidden_size, filter_size, dropout_rate)
        self.ffn_dropout = nn.Dropout(dropout_rate)

    def forward(self, x, enc_output, self_mask, i_mask, cache):
        y = self.self_attention_norm(x)
        y = self.self_attention(y, y, y, self_mask)
        y = self.self_attention_dropout(y)
        x = x + y

        if enc_output is not None:
            y = self.enc_dec_attention_norm(x)
            y = self.enc_dec_attention(y, enc_output, enc_output, i_mask,
                                       cache)
            y = self.enc_dec_attention_dropout(y)
            x = x + y

        y = self.ffn_norm(x)
        y = self.ffn(y)
        y = self.ffn_dropout(y)
        x = x + y
        return x


class Encoder(nn.Module):
    def __init__(self, hidden_size, filter_size, dropout_rate, n_layers):
        super(Encoder, self).__init__()

        encoders = [EncoderLayer(hidden_size, filter_size, dropout_rate)
                    for _ in range(n_layers)]
        self.layers = nn.ModuleList(encoders)

        self.last_norm = nn.LayerNorm(hidden_size, eps=1e-6)

    def forward(self, inputs, mask):
        encoder_output = inputs
        for enc_layer in self.layers:
            encoder_output = enc_layer(encoder_output, mask)
        return self.last_norm(encoder_output)


class Decoder(nn.Module):
    def __init__(self, hidden_size, filter_size, dropout_rate, n_layers):
        super(Decoder, self).__init__()

        decoders = [DecoderLayer(hidden_size, filter_size, dropout_rate)
                    for _ in range(n_layers)]
        self.layers = nn.ModuleList(decoders)

        self.last_norm = nn.LayerNorm(hidden_size, eps=1e-6)

    def forward(self, targets, enc_output, i_mask, t_self_mask, cache):
        decoder_output = targets
        for i, dec_layer in enumerate(self.layers):
            layer_cache = None
            if cache is not None:
                if i not in cache:
                    cache[i] = {}
                layer_cache = cache[i]
            decoder_output = dec_layer(decoder_output, enc_output,
                                       t_self_mask, i_mask, layer_cache)
        return self.last_norm(decoder_output)


class Transformer(nn.Module):
    def __init__(self, i_vocab_size, t_vocab_size,
                 n_layers=6,
                 hidden_size=512,
                 filter_size=2048,
                 dropout_rate=0.1,
                 share_target_embedding=True,
                 has_inputs=True,
                 src_pad_idx=None,
                 trg_pad_idx=None):
        super(Transformer, self).__init__()

        self.hidden_size = hidden_size
        self.emb_scale = hidden_size ** 0.5
        self.has_inputs = has_inputs
        self.src_pad_idx = src_pad_idx
        self.trg_pad_idx = trg_pad_idx

        self.t_vocab_embedding = nn.Embedding(t_vocab_size, hidden_size)
        nn.init.normal_(self.t_vocab_embedding.weight, mean=0,
                        std=hidden_size**-0.5)
        self.t_emb_dropout = nn.Dropout(dropout_rate)
        self.decoder = Decoder(hidden_size, filter_size,
                               dropout_rate, n_layers)

        if has_inputs:
            if not share_target_embedding:
                self.i_vocab_embedding = nn.Embedding(i_vocab_size,
                                                      hidden_size)
                nn.init.normal_(self.i_vocab_embedding.weight, mean=0,
                                std=hidden_size**-0.5)
            else:
                self.i_vocab_embedding = self.t_vocab_embedding

            self.i_emb_dropout = nn.Dropout(dropout_rate)

            self.encoder = Encoder(hidden_size, filter_size,
                                   dropout_rate, n_layers)

        # For positional encoding
        num_timescales = self.hidden_size // 2
        max_timescale = 10000.0
        min_timescale = 1.0
        log_timescale_increment = (
            math.log(float(max_timescale) / float(min_timescale)) /
            max(num_timescales - 1, 1))
        inv_timescales = min_timescale * torch.exp(
            torch.arange(num_timescales, dtype=torch.float32) *
            -log_timescale_increment)
        self.register_buffer('inv_timescales', inv_timescales)

    def forward(self, inputs, targets):
        enc_output, i_mask = None, None
        if self.has_inputs:
            i_mask = utils.create_pad_mask(inputs, self.src_pad_idx)
            enc_output = self.encode(inputs, i_mask)

        t_mask = utils.create_pad_mask(targets, self.trg_pad_idx)
        target_size = targets.size()[1]
        t_self_mask = utils.create_trg_self_mask(target_size,
                                                 device=targets.device)
        return self.decode(targets, enc_output, i_mask, t_self_mask, t_mask)

    def encode(self, inputs, i_mask):
        # Input embedding
        input_embedded = self.i_vocab_embedding(inputs)
        input_embedded.masked_fill_(i_mask.squeeze(1).unsqueeze(-1), 0)
        input_embedded *= self.emb_scale
        input_embedded += self.get_position_encoding(inputs)
        input_embedded = self.i_emb_dropout(input_embedded)

        return self.encoder(input_embedded, i_mask)

    def decode(self, targets, enc_output, i_mask, t_self_mask, t_mask,
               cache=None):
        # target embedding
        target_embedded = self.t_vocab_embedding(targets)
        target_embedded.masked_fill_(t_mask.squeeze(1).unsqueeze(-1), 0)

        # Shifting
        target_embedded = target_embedded[:, :-1]
        target_embedded = F.pad(target_embedded, (0, 0, 1, 0))

        target_embedded *= self.emb_scale
        target_embedded += self.get_position_encoding(targets)
        target_embedded = self.t_emb_dropout(target_embedded)

        # decoder
        decoder_output = self.decoder(target_embedded, enc_output, i_mask,
                                      t_self_mask, cache)
        # linear
        output = torch.matmul(decoder_output,
                              self.t_vocab_embedding.weight.transpose(0, 1))

        return output

    def get_position_encoding(self, x):
        max_length = x.size()[1]
        position = torch.arange(max_length, dtype=torch.float32,
                                device=x.device)
        scaled_time = position.unsqueeze(1) * self.inv_timescales.unsqueeze(0)
        signal = torch.cat([torch.sin(scaled_time), torch.cos(scaled_time)],
                           dim=1)
        signal = F.pad(signal, (0, 0, 0, self.hidden_size % 2))
        signal = signal.view(1, max_length, self.hidden_size)
        return signal


In [118]:
model = Transformer(i_vocab_size= 3000, t_vocab_size=100, n_layers=6, hidden_size=512, filter_size=2048,  dropout_rate=0.1)
sum(p.numel() for p in model.parameters())

44154880

In [300]:
input_ = int(input())

i = 1
while True:
    if 3 * i*(i-1)+1 >= input_:
        print(i)
        break
    else:
        i+=1
    


 13


3
