# Моделирование охлаждения NVT ансамбля <a class="tocSkip">

   - Система: $N = 1372$ частиц;
   - Ячейка моделирования: $V = 12.25\sigma\times12.25\sigma\times12.25\sigma$;
   - Потенциал взаимодействия: Леннард-Джонс;
   - Ансамбль: $NVT$;
   - Начальная температура: $T_i = 1.3~\varepsilon / k_B$;
   - Конечная температура: $T_f = 10^{-4}~\varepsilon / k_B$;
   - Скорости охлаждения: $\gamma = 2\times10^{-5}~\varepsilon / k_B\tau$;
   - Термостат: масштабирование скоростей;
   - Толщина сферического слоя (список Верле): $\Delta r_s = 0.3\sigma $;
   - Временной шаг: $\Delta t = 0.005\tau$;

## Imports

In [None]:
from datetime import datetime
from pathlib import Path
import sys

BASE_DIR = Path('.').resolve().parent
sys.path.append(str(BASE_DIR))

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy import interpolate

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.neighbors import KNeighborsRegressor, RadiusNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor

In [None]:
%load_ext autoreload
%autoreload 2

from scripts_old.plotter import Plotter, get_temperature_legend
import scripts_old.postprocessing as pp
from scripts_old.properties.ovito_procedures import OvitoProcessor

## Constants

In [None]:
START_TIME = datetime.now()
CURRENT_DATA = '2022-09-25_velocity_scaling_HV_2e-5_T_01e-4'
PATH_TO_CURRENT_DATA = BASE_DIR / 'data' / CURRENT_DATA
PATH_TO_CURRENT_PLOTS = BASE_DIR / 'plots' / 'article'
PLOT_FILENAME_POSTFIX = 'HV_2e-5'
PATH_TO_CURRENT_DATA

In [None]:
RDF_RECOVERY_ERROR = 0.005
CELL_DIMENSIONS = np.ones(3) * 12.25

## Data reading

In [None]:
temperatures, rdf_dataframes = [], []

for filename in list(PATH_TO_CURRENT_DATA.iterdir())[::-1]:
    if filename.match('rdf*.csv'):
        parts = filename.stem.split('_')
        temperatures.append(float(parts[2]))
        rdf_dataframes.append(pd.read_csv(filename, sep=';'))
        
temperatures = np.array(temperatures).round(2)
all_radiuses = np.array(rdf_dataframes[0]['radius'].values)
full_matrix = pd.concat(rdf_dataframes, axis=1).iloc[:,1::2].to_numpy()

all_radiuses.shape, temperatures.shape, full_matrix.shape

In [None]:
all_radiuses

In [None]:
temperatures

## RDF parameters definition

In [None]:
nz_indices = np.where(full_matrix.any(axis=1))[0]

nz_radiuses = all_radiuses[nz_indices]
nz_matrix = full_matrix[nz_indices]
nz_radiuses.shape, nz_matrix.shape

In [None]:
r_min = nz_radiuses[0]
print(f'{r_min = }')

In [None]:
dr = round(nz_radiuses[1] - nz_radiuses[0], 4)
rdf_integral = nz_matrix[:, 0].sum() * dr
rdf_integral, dr

In [None]:
radius_scale = 0
rdf_sum = rdf_integral
while abs(rdf_integral - rdf_sum) / rdf_integral <= RDF_RECOVERY_ERROR:
    radius_scale += 1
    radius_step = radius_scale * dr
    rdf_sum = nz_matrix[::radius_scale, 0].sum() * radius_step
    rdf_parameters_number = nz_matrix[::radius_scale, 0].size

radius_scale -= 1
radius_step = radius_scale * dr

selected_indices = set(np.arange(nz_indices[0], all_radiuses.size, radius_scale))
selected_indices.add(full_matrix[::, 0].argmax())
selected_indices.add(nz_indices[-1])
selected_indices = np.array(sorted(selected_indices))
selected_radiuses = all_radiuses[selected_indices]
print(selected_indices)
print(selected_radiuses)

In [None]:
rdf_parameters_number = selected_radiuses.size
print(f'{rdf_parameters_number = }')

In [None]:
selected_matrix = full_matrix[selected_indices]
selected_matrix.shape

In [None]:
plotter = Plotter(
    path_to_plots=PATH_TO_CURRENT_PLOTS,
    limits=dict(
        left=0,
        right=6,
        bottom=-0.1,
        top=2.5,
    ),
    labels=('radius', 'rdf'),
)
plotter.ax.plot(all_radiuses, full_matrix[:, 0], label=r'$T=156$ K')
plotter.ax.scatter(selected_radiuses, selected_matrix[:, 0], s=20)
plotter.set_major_locators(1, 0.5)
plotter.set_minor_locators(0.2, 0.1)
plotter.get_legend()
plt.show()

## Regression Models Training

In [None]:
max_mse = 1e-4
r_dict = {
    'L': {'regressors': [], 'max_error': max_mse},
    'K': {'regressors': [], 'max_error': max_mse},
    'KW': {'regressors': [], 'max_error': max_mse},
    'R': {'regressors': [], 'max_error': 1e-3},
    'RW': {'regressors': [], 'max_error': 1e-3},
    'DT': {'regressors': [], 'max_error': max_mse},
}
for row in selected_matrix:
    r_dict['L']['regressors'].append(LinearRegression())
    r_dict['K']['regressors'].append(KNeighborsRegressor(weights='uniform', n_neighbors=2))
    r_dict['KW']['regressors'].append(KNeighborsRegressor(weights='distance', n_neighbors=2))
    r_dict['R']['regressors'].append(RadiusNeighborsRegressor(weights='uniform', radius=1.3))
    r_dict['RW']['regressors'].append(RadiusNeighborsRegressor(weights='distance', radius=1.3))
    r_dict['DT']['regressors'].append(DecisionTreeRegressor())

In [None]:
samples = temperatures[:, np.newaxis]

def train_regressors(regressors, max_error):
    temperatures_number = 5
    max_metric_value = 1
    while max_metric_value > max_error:
        temperatures_number += 1
        errors = []
        for j, row in enumerate(selected_matrix):
            train_samples, _, train_targets, _ = train_test_split(
                samples[:temperatures_number], 
                row[:temperatures_number], 
                test_size=0.5, 
                random_state=42,
            )
            regressors[j].fit(train_samples, train_targets)
            predicted = regressors[j].predict([samples[temperatures_number]])
            if row[temperatures_number]:
                errors.append(
                    mean_squared_error(
                        y_true=[row[temperatures_number]], 
                        y_pred=predicted,
                    ),
                )
        max_metric_value = max(errors)
    return regressors, temperatures_number 

In [None]:
for model, _ in r_dict.items():
    regressors, temperatures_number = train_regressors(r_dict[model]['regressors'], max_error=r_dict[model]['max_error'])
    r_dict[model]['regressors'] = regressors
    r_dict[model]['temperatures_number'] = temperatures_number
    print(model, temperatures_number)

## Prediction

In [None]:
index_to_predict = 55

In [None]:
def get_prediction(regressors):
    predicted_parameters = np.array(
        [regressor.predict([samples[index_to_predict]]) for regressor in regressors]
    ).flatten()
    mse = mean_squared_error(predicted_parameters, selected_matrix[:, index_to_predict])
    return predicted_parameters, mse

In [None]:
for model, _ in r_dict.items():
    predicted_parameters, mse = get_prediction(r_dict[model]['regressors'])
    r_dict[model]['predicted_parameters'] = predicted_parameters
    r_dict[model]['mse_parameters'] = mse
    print(model, mse)

In [None]:
plotter = Plotter(
    path_to_plots=PATH_TO_CURRENT_PLOTS,
    limits=dict(
        left=0,
        right=10,
        bottom=-1,
        top=6,
    ),
    labels=('radius', r'$g-\hat{g}$'),
)
plotter.set_major_locators(2, 2)
plotter.set_minor_locators(0.4, 0.4)
i = 0
for model, _ in r_dict.items():
    plotter.ax.scatter(
        selected_radiuses, 
        (selected_matrix[:, index_to_predict] - r_dict[model]['predicted_parameters']) + i, 
        label=model,
        s=10,
    )
    i += 1
plotter.get_legend()
plt.show()

## Reconstruction

In [None]:
def get_interpolation(predicted_parameters):
    interpolator = interpolate.interp1d(selected_radiuses, predicted_parameters, kind='cubic')
    predicted_rdf = np.zeros(all_radiuses.shape[0])
    predicted_rdf[nz_indices] = interpolator(nz_radiuses)
    predicted_rdf[predicted_rdf < 0] = 0
    return predicted_rdf

In [None]:
for model, _ in r_dict.items():
    r_dict[model]['predicted_rdf'] = get_interpolation(r_dict[model]['predicted_parameters'])
    mse = mean_squared_error(full_matrix[:, index_to_predict], r_dict[model]['predicted_rdf'])
    r_dict[model]['mse'] = mse
    plotter = Plotter(
        path_to_plots=PATH_TO_CURRENT_PLOTS,
        limits=dict(left=0, right=6, bottom=-0.1, top=3),
        labels=('radius', 'rdf'),
    )
    s = 20
    plotter.ax.plot(all_radiuses, full_matrix[:, index_to_predict], '--', label='Observed')
    plotter.ax.plot(all_radiuses, r_dict[model]['predicted_rdf'], ':', label='Predicted')
    plotter.set_major_locators(1, 0.5)
    plotter.set_minor_locators(0.2, 0.1)
    plotter.get_legend()
    plotter.ax.text(
        x=3, 
        y=1.5, 
        s=f'Model: {model};\n' + fr'$\xi_j$={mse:.4f}.', 
        bbox=dict(boxstyle='round', facecolor='lightyellow'),
    )
    plt.show()

In [None]:
models_by_time = sorted([(model, r_dict[model]['temperatures_number']) for model, _ in r_dict.items()], key=lambda x: x[1])
models_by_time

In [None]:
models_by_mse = sorted([(model, r_dict[model]['mse']) for model, _ in r_dict.items()], key=lambda x: x[1])
models_by_mse

In [None]:
for model, _ in r_dict.items():
    predicted_rdf_df = pd.DataFrame({
        'radius': all_radiuses, 
        'predicted_rdf': r_dict[model]['predicted_rdf'],
        'observed_rdf': full_matrix[:, index_to_predict],
    })
    predicted_rdf_df.to_csv(str(PATH_TO_CURRENT_DATA / f'predicted_rdf_{model}.csv'), index=False, sep=';')
predicted_rdf_df

## End

In [None]:
print(f'Execution Time: {datetime.now() - START_TIME}')