In [942]:
import torch
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import Normalizer
from torch.utils.data import DataLoader, TensorDataset
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from numba import njit
from numpy.linalg import norm

import torch.nn as nn
import torch.nn.functional as F
from tqdm import tqdm
import torch.optim as optim
from torch.optim import lr_scheduler
from torchmetrics import MeanAbsolutePercentageError

import seaborn as sns
sns.set_style("darkgrid", {"grid.color": ".6", "grid.linestyle": ":"})

from utility_funcs import *

import dill

from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import DotProduct, WhiteKernel, ConstantKernel, RBF
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import Normalizer

In [943]:
model_type = "gpr"

In [944]:
class SingleNet(nn.Module):
    '''

    Класс одиночной нейронной сети

    '''
    def __init__(self, output_size, activation=nn.ReLU(), flattened_size=CFG.K * CFG.K):
        '''
        
        FC_type: тип полносвязных слоев: 'regular' / 'simple

        convolution: сверточная часть сети

        '''
        super().__init__()

        self.FC = nn.Sequential(
        #     # nn.BatchNorm1d(flattened_size),

            nn.Linear(flattened_size, 128),
            activation,
            # nn.Dropout(0.3),
            nn.BatchNorm1d(128),

            nn.Linear(128, 256),
            activation,
            # nn.Dropout(0.3),
            nn.BatchNorm1d(256),

            nn.Linear(256, 256),
            activation,
            # nn.Dropout(0.3),
            nn.BatchNorm1d(256),

            nn.Linear(256, 256),
            activation,
            # nn.Dropout(0.3),
            nn.BatchNorm1d(256),

            # nn.Linear(256, 256),
            # activation,
            # # nn.Dropout(0.3),
            # nn.BatchNorm1d(256),
            
            nn.Linear(256, 512),
            activation,
            # nn.Dropout(0.3),
            # nn.BatchNorm1d(512),
            nn.Linear(512, output_size),
        )

        # self.FC = nn.Sequential(
        #     nn.Linear(flattened_size, 64),
        #     activation,

        #     nn.Linear(64, output_size)
        # )

        # self.FC = nn.Linear(flattened_size, output_size)

    def forward(self, x):
        # x - is batch of matrices KxK

        # Здесь происходят какие-то там свертки, пуллинги и тп..

        x = self.FC(x)

        return x

In [945]:
class SklearnModel:
    def __init__(self, model):
        '''
        model - sklearn model
        '''
        self.model = model

    def __call__(self, X):
        return self.model.predict(X)

In [946]:
model_file_path = f'./trained_models/{model_type}_{CFG.N}_movements_K{CFG.K}.pickle'
model_vel_file_path = f'./trained_models/{model_type}_{CFG.N}_velocities_K{CFG.K}.pickle'

# Сетка предсказания сжатые будет выдавать и их надо будет возвращать к обычному скейлу
descaler_path = f'./trained_models/descaler_{CFG.N}_K{CFG.K}.pickle'
descaler_vel_path = f'./trained_models/descaler_vel_{CFG.N}_K{CFG.K}.pickle'

In [947]:
with open(model_file_path, 'rb') as handle:
    model = dill.load(handle)

with open(model_vel_file_path, 'rb') as handle:
    model_vel = dill.load(handle)

if model_type == "gpr":
    descaler = Descaler(1, 0)
    descaler_vel = Descaler(1, 0)
    model = SklearnModel(model)
    model_vel = SklearnModel(model_vel)
else:
    with open(descaler_path, 'rb') as handle:
        descaler = dill.load(handle)
    

In [948]:
def csv_row_to_state(coords_path, vels_path, row_number=0):
    '''
    This function will be mostly used to start integration and create header for csv to write into
    '''

    row_coords = np.array(pd.read_csv(
        coords_path
    ).iloc[row_number, :][1:]).reshape(CFG.N, 3)

    row_vels = np.array(pd.read_csv(
        vels_path
    ).iloc[0, :][1:]).reshape(CFG.N, 3)
    
    state = {i: [row_coords[i], row_vels[i]] for i in range(CFG.N)}


    return state

state = csv_row_to_state(
    f'./coords_and_movements/coords{CFG.N}.csv',
    f'./coords_and_movements/velocities{CFG.N}.csv',

    row_number=50
)

In [949]:
def _get_relative_positions(state, atom_number):
    '''
    This function processes one row of csv into something that we can work with

    Returns np.array matrix that consists of relative positions vectors for passed atom_number to every other atom
    and then we can chose only closest N_neighbours in the next functions
    
    row: df.iloc[row] - typeof(row): pd.Series
    
    returns: Rel_matrix, f_vec
    '''

    other_atom_numbers = [i for i in range(CFG.N) if i != atom_number]
    Rel_matrix = np.zeros([CFG.N - 1, 3])

    for i, other_numb in enumerate(other_atom_numbers):
        Rel_matrix[i] = state[atom_number][0] - state[other_numb][0]

    # print('rel_dists: ', Rel_matrix)

    return np.array(Rel_matrix)

def create_V_i(i, normalized_m, norms, r_cut=CFG.r_cut, p=CFG.p):
    '''
    normalized_m: matrix of relative distances, where rows - normalized vectors
    i: i-th component of r_cut and p, i in range 1..K (or in 0..K-1 in code)
    '''
    transf_vecs = make_matrix_transformed(normalized_m, norms[:, np.newaxis], r_cut[i], p[i])

    return np.sum(transf_vecs, axis=0)

# @njit(parallel=True)
def create_V(normalized_m, norms, K=CFG.K):
    '''
    creates V
    '''
    V = []
    for i in range(K):
        V.append(
            create_V_i(i, normalized_m, norms)
        )

    return np.stack(V)

In [950]:
# @njit(
#     parallel=True,
#     fastmath=True
#     )
def _calculate_matrix_for_atom(relative_distances, r_cut=CFG.r_cut, p=CFG.p, N_neig=CFG.N_neig, K=CFG.K, use_orthogonal=False, use_A_t=True):
    '''

    relative_distances: np.array matrix of relative distance vectors

    '''
    
    # Only closest N_neig are counting:
    indexlist = np.argsort(norm(relative_distances, axis=1))

    relative_distances = relative_distances[indexlist[len(relative_distances) - N_neig:]]

    norms = norm(relative_distances, axis=-1)

    normalized_rel_distances = relative_distances / norms[:, np.newaxis]

    V = create_V(normalized_rel_distances, norms)

    if use_orthogonal and CFG.K == 3:
        diagonal_V_matr = [[0] * CFG.K for i in range(K)]
        for i in range(K):
            diagonal_V_matr[i][i] = V[i][i]
        V = np.array(
            diagonal_V_matr
        )

    A = V / norm(V, axis=-1)[:, np.newaxis]

    if use_A_t:
        X = V @ A.T
    else:
        X = V

    return X, A

def get_matrix_for_atom(state, atom_number, N_neig=CFG.N_neig, use_orthogonal=True, use_A_t=True):
    '''

    This function will create X matrix for passed atom with
    arrays of r_cut and p of length k

    It is a wrapper for _get_relative_positions and _calculate_matrix_for_atom, so I can speed up matrix calculations
    with numba for _calculate_matrix_for_atom

    atom_number: a number of atom that we are passing
    row: one row from df_with_coords, i.e. df.iloc[index_of_row]

    '''
    # @njit
    def get_pinv(A):
        return np.linalg.pinv(A)

    # creating row of relative coordinates for concrete atom:
    relative_distances = _get_relative_positions(state=state, atom_number=atom_number)
    X, A = _calculate_matrix_for_atom(relative_distances=relative_distances, N_neig=N_neig, use_orthogonal=use_orthogonal, use_A_t=use_A_t)

    flat_X = np.concatenate([X.flatten(), state[atom_number][1]])
    
    return flat_X, get_pinv(A)

# %timeit get_matrix_for_atom(row=df.iloc[0], atom_number=1)

In [951]:
csv_naming = []
for i in range(CFG.N):
    csv_naming.extend([str(i) + "x", str(i) + "y", str(i) + "z"])
result_csv = "./integration_res/result_coords.csv"


def fill_csv(state, path):
    '''
    fills csv after current step
    '''
    for atom_numb in range(CFG.N):
        pass

def fill_xyz(state, path):
    '''
    fills xyz file after current step
    '''
    pass

IndentationError: expected an indented block (<ipython-input-951-38259ada2ef6>, line 13)

In [None]:
def make_prediction(X, pinv_A, model):
    return pinv_A @ model(X.reshape(1, -1)).squeeze()

In [None]:
def make_predictions(state, model):
    '''
    predicting on multiple rows is faster
    '''
    Xs = []
    pinv_As = []
    for atom_num in range(CFG.N):
        X, pinv_A = get_matrix_for_atom(state=state, atom_number=atom_num)
        Xs.append(X)
        pinv_As.append(pinv_A)
    Xs = np.array(Xs, dtype=np.float32)
    pinv_As = np.array(pinv_As)
    preds = model(Xs)

    return [pinv_As[i] @ preds[i] for i in range(CFG.N)]

# make_predictions(state, model=model)

# make_predictions(state, model=model_vel)

In [None]:
def make_step(state):
    dses = make_predictions(state, model)
    dvs = make_predictions(state, model_vel)
    for atom_num in range(CFG.N):
        state[atom_num][0] += dses[atom_num]
        state[atom_num][1] += dvs[atom_num]
    
    return state

In [None]:
make_step(state=state)

{0: [array([-0.08500186,  0.04863722,  0.03553856]),
  array([-1.55676056, -0.37403802,  0.2729956 ])],
 1: [array([ 1.36564848, -0.02256105, -0.04215596]),
  array([-4.65428112,  2.24508663,  0.62201187])],
 2: [array([-0.05339952,  1.37264916,  0.00597156]),
  array([-1.58769599, -3.75978543,  0.09783248])]}

In [None]:
def integration_cycle(state, number_of_steps, csv_path=None, xyz_path=None):
    '''
    state: starting state
    '''
    for step in range(number_of_steps):
        fill_csv(step, csv_path)
        fill_xyz(step, xyz_path)
        state = make_step(state)

    return state

In [None]:
integration_cycle(state=state, number_of_steps=100)

{0: [array([-0.42024516, -0.12250679,  0.12140434]),
  array([-0.54730273,  0.67520562,  1.94337576])],
 1: [array([ 1.36126203, -0.02151821, -0.04165194]),
  array([-5.47814555,  2.48833797,  0.66326832])],
 2: [array([-0.05499572,  1.35640266,  0.04343953]),
  array([-2.21367084, -4.66874242, -0.52754341])]}