## Imports

In [1]:
import dostools
import importlib
import numpy as np
import pickle
import torch
import sys
import matplotlib.pyplot as plt
import copy
from tqdm import tqdm
import matplotlib
import time
torch.set_default_dtype(torch.float64) 
%matplotlib notebook
matplotlib.rcParams['figure.figsize'] = (10, 10)
sys.modules['dostools.src'] = dostools

In [2]:
import dostools.datasets.data as data
import dostools.utils.utils as utils

n_structures = 1039
np.random.seed(0)
n_train = int(0.8 * n_structures)
train_index = np.arange(n_structures)
np.random.shuffle(train_index)
test_index = train_index[n_train:]
train_index = train_index[:n_train]

with torch.no_grad():
    structures = data.load_structures(":")
    n_structures = len(structures) #total number of structures
    for structure in structures:#implement periodicity
        structure.wrap(eps = 1e-12) 
    n_atoms = np.zeros(n_structures, dtype = int) #stores number of atoms in each structures
    for i in range(n_structures):
        n_atoms[i] = len(structures[i])

    #eigenergies, emin, emax = dostools.src.datasets.data.load_eigenenergies(unpack = True, n_structures = len(structures))
    xdos = torch.tensor(data.load_xdos())
    ldos = torch.tensor(data.load_ldos())
    ldos *= 2

    print ("ldos shape is {}".format(ldos.shape))
    mean_dos_per_atom = ldos[train_index].mean(axis = 0) #only calculated for train set to prevent data leakage
    print ("mean dos shape is {}".format(mean_dos_per_atom.shape))
    
    
    y_pw = ldos - mean_dos_per_atom
    y_lcdf = torch.cumsum(y_pw, dim = 1)
    _, pc_vectors = utils.build_pc(ldos[train_index], mean_dos_per_atom[None,:], n_pc = 10)
    y_pc = utils.build_coeffs(ldos - mean_dos_per_atom[None,:], pc_vectors)
    Silicon = data.load_features()
    kMM = data.load_kMM()

ldos shape is torch.Size([1039, 778])
mean dos shape is torch.Size([778])
Variance covered with 10 PCs is = 0.9871211778950163


## Evaluator

In [3]:
import dostools.evaluation.evaluation as evaluation
importlib.reload(evaluation)
import dostools.models.training as training
importlib.reload(training)

targets = {
    'pw' : ldos,
    'lcdf' : y_lcdf,
    'pc' : y_pc
}
evaluator = evaluation.Evaluator(targets, xdos, mean_dos_per_atom)

## Generate data

Data Generation will consist of two steps  
1. Generate a random alignment 
2. Use scikitlearn Ridge to determine optimal loss

In [18]:
### Define limits for alignment to prevent complete shifts (shifting the spectrum out of the data)

def determine_bounds(input_dos, threshold):
    boo_dos = input_dos > threshold
    left_bound = torch.nonzero(boo_dos)[0]
    right_bound = torch.nonzero(boo_dos)[-1]
        
    return (0-left_bound), (788-right_bound)

bounds = []
for i in ldos[train_index]:
    bounds.append(determine_bounds(i, 1e-10))

bounds = torch.tensor(bounds)

In [48]:
from sklearn.linear_model import Ridge
import dostools.src.loss.loss as loss
import dostools.consistency.consistency as consistency
importlib.reload(loss)

xdos_step = xdos[1] - xdos[0]
#Since it should be alright to overfit shifts
def determine_error(normalized_dos):
    model = Ridge(alpha = 0, fit_intercept = False, solver = 'svd')
    model.fit(Silicon.Features['structure_avekerneldescriptors'][train_index,:], normalized_dos)
    
    preds = model.predict(Silicon.Features['structure_avekerneldescriptors'][train_index,:])
    with torch.no_grad():
        rmse = loss.t_get_rmse(torch.tensor(preds), normalized_dos, xdos)
        
    return rmse
        

def normalize(ldos, alignment):
    shifted_dos = consistency.shifted_ldos(ldos, xdos, alignment)
    mean = torch.mean(shifted_dos, dim = 0)
    normalized_dos = shifted_dos - mean
    
    return normalized_dos

def generate_sample(bounds, ldos, batch_size):
    x_sample = []
    for bound in bounds:
        col_i = torch.randint(bound[0], bound[1], (batch_size,1))
        x_sample.append(col_i)
    
    x_sample = torch.hstack(x_sample)
    
    y_sample = []
    for x in x_sample:
        normalized_dos = normalize(ldos, x)
        y = determine_error(normalized_dos)
        y_sample.append(y)
        
        
    y_sample = torch.tensor(y_sample)
    
    return (x_sample, y_sample)

In [57]:
from scipy.optimize import minimize
from sklearn.linear_model import Ridge


lowest_rmse = 10
global lowest_rmse
best_alignment = 0
global best_alignment

def shift_rmse(alignment):
    alignment = torch.tensor(alignment)
    normalized_dos = normalize(ldos[train_index], alignment) 
    model = Ridge(alpha = 0, fit_intercept = False, solver = 'lsqr')
    model.fit(Silicon.Features['structure_avekerneldescriptors'][train_index,:], normalized_dos)
    preds = model.predict(Silicon.Features['structure_avekerneldescriptors'][train_index,:])
    with torch.no_grad():
        rmse = loss.t_get_rmse(torch.tensor(preds), normalized_dos, xdos)
        if rmse < lowest_rmse:
            lowest_rmse = rmse
            best_alignment = alignment
            
    print ("The error is currently :{}".format(rmse))
    return rmse

rmin = minimize(fun = shift_rmse, x0 = np.zeros(831), bounds = bounds * xdos_step, method = "Nelder-Mead", options ={"maxiter": 1})

UnboundLocalError: local variable 'lowest_rmse' referenced before assignment

In [53]:
np.zeros(831)+20

array([20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20.,
       20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20.,
       20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20.,
       20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20.,
       20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20.,
       20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20.,
       20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20.,
       20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20.,
       20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20.,
       20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20.,
       20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20.,
       20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20.,
       20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20.,
       20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20

In [39]:
shift_rmse(np.zeros(831))

tensor(1.6779e-09)

In [50]:
bounds * xdos_step

tensor([[-3.5546,  6.0578],
        [-7.2594,  7.4596],
        [-1.4018,  1.1014],
        ...,
        [-6.6085,  6.5084],
        [-5.4070,  6.5084],
        [-3.4545,  5.2067]])

## Model

In [127]:
import dostools.src.models.models as models
import dostools.src.models.training as training
import dostools.src.models.architectures as architecture
import dostools.src.loss.loss as loss
import torch.nn as nn

importlib.reload(models)
importlib.reload(training)
importlib.reload(architecture)
importlib.reload(loss)


class ShiftErrorModel(nn.Module):
    def __init__(self, inputSize, intermediateSize, outputSize):
        super(ShiftErrorModel, self).__init__()
        self.fc1 = nn.Linear(inputSize, intermediateSize, bias = True)
        self.fc2 = nn.Linear(intermediateSize, intermediateSize)
        self.fc3 = nn.Linear(intermediateSize,intermediateSize)
        self.fc4 = nn.Linear(intermediateSize, outputSize)
        self.silu = torch.nn.SiLU()

    def forward(self, x):
        """
        Performs the transformations to the features based on the model
        
        Args:
            x (tensor): input features
        
        Returns:
            tensor: output
        """
        out = self.fc1(x)
        out = self.silu(out)
        out = self.fc2(out)
        out = self.silu(out)
        out = self.fc3(out)
        out = self.silu(out)
        out = self.fc4(out)
        return out

 

In [128]:
model = ShiftErrorModel(831, 300, 1)

In [None]:
lr = 0.001
batch_size = 16
n_epochs = 100000
weight_decay = 0

opt = torch.optim.Adam(model.parameters(), lr = lr, weight_decay = weight_decay)
threshold = 1000
scheduler_threshold = 100
tol = 1e-4
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(opt, factor = 0.1, patience = scheduler_threshold)#0.5)
best_state = copy.deepcopy(model.state_dict())
lowest_loss = torch.tensor(9999)
pred_loss = torch.tensor(0)
trigger = 0
loss_history =[]
pbar = tqdm(range(n_epochs))

for epoch in pbar:
    pbar.set_description(f"Epoch: {epoch}")
    pbar.set_postfix(pred_loss = pred_loss.item(), lowest_loss = lowest_loss.item(), trigger = trigger)
    opt.zero_grad()
    x_data, y_data = generate_sample(bounds, ldos[train_index], batch_size)
    pred = model.forward(x_data.double())
    pred_loss = loss.t_get_mse(pred.view(-1,1), y_data.view(-1,1))#, self.xdos, perc = True)
    new_loss = pred_loss #*1E7
    new_loss.backward()
    opt.step()
    if pred_loss >100000 or (pred_loss.isnan().any()) :
        print ("Optimizer shows weird behaviour, reinitializing at previous best_State")
        model.load_state_dict(best_state)
        opt = torch.optim.Adam(model.parameters(), lr = lr, weight_decay = weight_decay)
    if epoch %1000 == 1:
        loss_history.append(lowest_loss.item())
    if lowest_loss - new_loss > tol: #threshold to stop training
        best_state = copy.deepcopy(model.state_dict())
        lowest_loss = new_loss
        trigger = 0
    else:
        trigger +=1

        if trigger > threshold:
            weight_decay.load_state_dict(best_state)
            for g in opt.param_groups:
                g['lr'] = lr
            batch_size = batch_size * 8
            print ("Increasing batch_size: {}".format(lowest_loss))


Epoch: 806:   1%|▊                                                                                                | 806/100000 [18:17:00<1910:26:00, 69.33s/it, lowest_loss=0.00173, pred_loss=0.00533, trigger=67]

## Dataset and DataLoader