In [None]:
from dscribe.descriptors import SOAP, MBTR
from ase.io import read
from ase import Atoms
from ase import neighborlist
import ase.data
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
import pandas as pd
import numpy as np
import pickle as pk
import os
import spglib
from asap3.analysis.rdf import RadialDistributionFunction
import matplotlib.pyplot as plt
from scipy.sparse import csr_matrix
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import matplotlib
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA

import torch as th
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torch.nn.functional as F

In [None]:
#device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = torch.device("cpu")

In [None]:
mpdata = pk.load(open('mp0106_max5_all_structure.p', 'rb'))
#len(mpdata)
mptarget = pk.load(open('mp0106_max5_all_props.p', 'rb'))
#len(mptarget)

In [None]:
species = set()
for i in range(len(mpdata)):
    species.update(mpdata[i].get_chemical_symbols())

In [None]:
mbtr_lists = []

for i in range(len(mpdata)):
    species = set()
    species.update(mpdata[i].get_chemical_symbols())
    mbtr = MBTR(
        species=species,
        periodic=True,
        k1={
            "geometry": {"function": "atomic_number"},
            "grid": {"min": 0, "max": 82, "sigma": 0.1, "n": 82},
        },
        k2={
            "geometry": {"function": "inverse_distance"},
            #"grid": {"min": 0.5, "max": 6, "sigma": 0.02, "n": 50},
            "grid": {"min": 0, "max": 1, "sigma": 0.02, "n": 30},
            "weighting": {"function": "exp", "scale": 1.0, "threshold": 1e-3},
        },
        k3={
            "geometry": {"function": "cosine"},
            "grid": {"min": -1.0, "max": 1.0, "sigma": 0.02, "n": 30},
            "weighting": {"function": "exp", "scale": 1.0, "threshold": 1e-3},
        },
        flatten=True,
        normalization="n_atoms",
        sparse=False)
    onevec = mbtr.create(mpdata[i])
    mbtr_lists.append(onevec)

In [None]:
sizes = []
for i in range(len(mbtr_lists)):
    sizes.append(len(mbtr_lists[i]))

In [None]:
zeropadding_mbtrlists = []

for i in range(len(mbtr_lists)):
    zeros = np.zeros(max(sizes)-len(mbtr_lists[i]))
    one = np.append(mbtr_lists[i], zeros)
    zeropadding_mbtrlists.append(one)

In [None]:
#Set our seed and other configurations for reproducibility

seed = 42
#42
torch.manual_seed(seed)
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True

In [None]:
new_tensors = []

for i in range(len(zeropadding_mbtrlists)):
    new_tensors.append(th.tensor(zeropadding_mbtrlists[i]))

new_t = th.stack(new_tensors)

In [None]:
arry = np.ones(len(new_t))

labels = th.from_numpy(arry)
labels = labels.clone().detach()
#labels = th.tensor(labels, dtype=torch.float32)
labels = labels.view(-1, 1)

In [None]:
batch_size = 128
epochs = 20
learning_rate = 1e-4

train_dataset = torch.utils.data.TensorDataset(new_t, labels)
#train_dataset = torch.utils.data.TensorDataset(train_tensor, train_label)
trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size)

In [None]:
inputdim = len(new_t[1])
second = 100
#embdim = 100

class AE(nn.Module):
    def __init__(self):
        super(AE, self).__init__()

        self.fc1 = nn.Linear(inputdim, second)
        #self.fc2 = nn.Linear(second, embdim)
        #self.fc3 = nn.Linear(embdim, second)
        self.fc4 = nn.Linear(second, inputdim)

    def encode(self, x):
        h1 = F.relu(self.fc1(x))
        #return torch.relu(self.fc2(h1))
        return h1

    def decode(self, z):
        #h3 = F.relu(self.fc3(z))
        #return torch.relu(self.fc4(h3))
        return torch.relu(self.fc4(z))
        

    def forward(self, x):
        z = self.encode(x.view(-1, inputdim))
        #z = self.reparameterize(mu, logvar)
        return self.decode(z)


model = AE().to(device)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
criterion = nn.MSELoss()
#criterion = my_loss()
#criterion = nn.L1Loss()
#criterion = MAPE()

In [None]:
# Training with Validation
epochs = 20
min_valid_loss = np.inf

for e in range(epochs):
    train_loss = 0.0
    for data, labels in trainloader:
        # Transfer Data to GPU if available
        if torch.cuda.is_available():
            data, labels = data.cuda(), labels.cuda()

        # Clear the gradients
        optimizer.zero_grad()
        # Forward Pass
        target = model(data.float())
        # Find the Loss
        loss = criterion(target.float(),data.float())
        #loss = my_loss(target.float(),data.float())
        
        # Calculate gradients
        loss.backward()
        # Update Weights
        optimizer.step()
        # Calculate Loss
        train_loss += loss.item()
        

    #valid_loss = 0.0
    #model.eval()     # Optional when not using Model Specific layer
    #for data, labels in validloader:
    #    # Transfer Data to GPU if available
    #    if torch.cuda.is_available():
    #        data, labels = data.cuda(), labels.cuda()

        # Forward Pass
    #    target = model(data.float())
    #    # Find the Loss
    #    loss = criterion(target.float(),labels.float())
    #    # Calculate Loss
    #    valid_loss += loss.item()

    #print(train_loss / len(trainloader), valid_loss / len(validloader))
    print(train_loss / len(trainloader))
    
    #if min_valid_loss > valid_loss:
    #    print(min_valid_loss, 'Saving The Model')
    #    min_valid_loss = valid_loss

        # Saving State Dict
    #torch.save(model.state_dict(), '30-ae_hv_saved_model.pth')

In [None]:
def evaluate(dataloader, model):
    size = len(dataloader.dataset)
    model.eval()
    test_loss, correct = 0, 0
    testlosslist = []
    with torch.no_grad():
        for test_batch, _ in dataloader:
            test_batch = test_batch.view(-1, len(new_t[1])).to(device)
            pred = model(test_batch.float())
            testloss = criterion(pred.float(), test_batch.float())
            testlosslist.append(testloss.item())
            #test_loss += testloss.item()
            #correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    #test_loss /= size
    #correct /= size
    #print(f"Test Error: \n Avg loss: {test_loss:>8f} \n")
    #testlosslist=[]
    #testlosslist.append(test_loss)
    return testlosslist

In [None]:
arry = np.ones(len(new_t))

labels = th.from_numpy(arry)
labels = labels.clone().detach()
#labels = th.tensor(labels, dtype=torch.float32)
labels = labels.view(-1, 1)

all_dataset = torch.utils.data.TensorDataset(new_t, labels)
alldataloader_eval = torch.utils.data.DataLoader(all_dataset, batch_size=1)
alllosses = evaluate(alldataloader_eval, model)