In [1]:
from IPython import get_ipython
ipython = get_ipython()

# autoreload extension
if 'autoreload' not in ipython.extension_manager.loaded:
    %load_ext autoreload

In [2]:
from copy import deepcopy
from datetime import datetime
import os
import sys

In [3]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_val_score, train_test_split, validation_curve
from sklearn.neighbors import KNeighborsRegressor, RadiusNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [4]:
sys.path.append(os.path.dirname(sys.path[0]))

import scripts.constants as con
from scripts.postprocessing import PostProcessor, RegressionRDF

%autoreload 2

### Настройки рисунков

In [5]:
FIGSIZE = (7, 7)
FONTSIZE = 14

plt.style.use('seaborn')
plt.rcParams['axes.labelpad'] = 0
plt.rcParams['lines.linewidth'] = 2
plt.rcParams['axes.linewidth'] = 2
plt.rcParams['figure.dpi'] = 300
plt.rcParams['font.family'] = 'Times New Roman'
plt.rcParams['figure.figsize'] = [i / 2.54 for i in (15, 15)]
plt.rcParams['mathtext.fontset'] = 'stix'
plt.rcParams['mathtext.it'] = 'Times New Roman'
plt.rcParams['xtick.labelsize'] = FONTSIZE
plt.rcParams['ytick.labelsize'] = FONTSIZE
plt.rcParams['legend.fontsize'] = FONTSIZE
plt.rcParams['axes.titlesize'] = FONTSIZE
plt.rcParams['axes.labelsize'] = FONTSIZE
plt.rcParams['font.size'] = FONTSIZE

COLORS = (
    'black',
    'red',
    'green',
    'blue',
    'cyan',
    'magenta',
    'purple',
    'orange',
    'olive',
    'yellow',
    'brown',
    'pink',
    'gray',
    'lime',
    'slateblue',
    'crimson',
    'darkviolet',
    'sienna',
    'coral',
)

### Пути к файлам

In [6]:
CURRENT_DATA_NORMAL = '2021-09-24_prepared_1.3_normal'
PLOT_FILENAME_POSTFIX_NORMAL = 'prepared_1.3_normal'

PATH_TO_CURRENT_DATA_NORMAL = os.path.join(con.PATH_TO_DATA, CURRENT_DATA_NORMAL)
PATH_TO_CURRENT_DATA_NORMAL

'D:\\albert\\programming\\git_reps\\molecular_dynamics\\data\\2021-09-24_prepared_1.3_normal'

In [7]:
x_dataframes = []
y_dataframes = []
z_dataframes = []
for filename in os.listdir(PATH_TO_CURRENT_DATA_NORMAL)[::+1]:
    if filename.startswith('system_configuration'):
        _column = filename.removesuffix('.csv')[-8:]
        df = pd.read_csv(os.path.join(PATH_TO_CURRENT_DATA_NORMAL, filename), sep=';')
        x_dataframes.append(df['x'].rename(_column))
        y_dataframes.append(df['y'].rename(_column))
        z_dataframes.append(df['z'].rename(_column))
        
x_dataframe = pd.concat(x_dataframes, axis=1)
y_dataframe = pd.concat(y_dataframes, axis=1)
z_dataframe = pd.concat(z_dataframes, axis=1)
x_dataframe

Unnamed: 0,10_40_05,10_40_06,10_40_07,10_40_08,10_40_09,10_40_10,10_40_11,10_40_12,10_40_13,10_40_14,...,11_21_07,11_21_08,11_21_09,11_21_10,11_21_11,11_21_12,11_21_13,11_21_14,11_21_15,11_21_16
0,-7.115450,-7.118707,-7.119494,-7.117805,-7.113833,-7.107811,-7.102768,-7.093782,-7.083159,-7.075172,...,-6.463215,-6.463234,-6.464162,-6.465053,-6.466206,-6.469213,-6.471015,-6.475075,-6.477272,-6.479537
1,0.133989,0.124397,0.119959,0.118981,0.120752,0.125483,0.130480,0.140792,0.154152,0.164432,...,0.202597,0.208334,0.219392,0.224729,0.229946,0.240035,0.244914,0.254368,0.258963,0.263488
2,2.191690,2.167954,2.143730,2.120071,2.098514,2.081732,2.074998,2.073670,2.083077,2.094448,...,1.529998,1.530347,1.531849,1.533041,1.534553,1.538585,1.541121,1.547213,1.550742,1.554560
3,-3.331674,-3.328198,-3.325491,-3.324145,-3.324990,-3.328357,-3.332009,-3.339798,-3.350732,-3.359553,...,-6.878835,-6.877410,-6.874266,-6.872564,-6.870783,-6.866996,-6.864991,-6.860752,-6.858508,-6.856173
4,-5.924645,-5.950929,-5.978239,-6.006435,-6.035427,-6.064983,-6.084729,-6.113946,-6.142076,-6.159738,...,-10.856795,-10.855969,-10.854505,-10.853994,-10.853695,-10.853858,-10.854348,-10.856124,-10.857372,-10.858820
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1367,7.857635,7.821628,7.783858,7.747074,7.714828,7.692004,7.684423,7.685496,7.698939,7.712570,...,6.597683,6.597089,6.595871,6.595269,6.594684,6.593600,6.593114,6.592282,6.591942,6.591655
1368,15.274581,15.268816,15.268323,15.273033,15.282157,15.294467,15.303594,15.317033,15.327894,15.332367,...,13.963383,13.969301,13.981590,13.987934,13.994376,14.007364,14.013781,14.026055,14.031729,14.036975
1369,11.444556,11.428040,11.410893,11.397222,11.392781,11.399435,11.408506,11.426776,11.449660,11.467281,...,11.794065,11.791325,11.784848,11.780860,11.776241,11.764946,11.758310,11.743488,11.735552,11.727444
1370,-0.368942,-0.386931,-0.399788,-0.406139,-0.405499,-0.398493,-0.390841,-0.375745,-0.357378,-0.344062,...,-6.164227,-6.172048,-6.187595,-6.195241,-6.202735,-6.217025,-6.223698,-6.235836,-6.241227,-6.246137


In [8]:
columns = x_dataframe.columns.values
rdf_dataframes = []
for filename in os.listdir(PATH_TO_CURRENT_DATA_NORMAL)[::+1]:
    if filename.startswith('rdf'):
        _column = filename.removesuffix('.csv')[-8:]
        df = pd.read_csv(os.path.join(PATH_TO_CURRENT_DATA_NORMAL, filename), sep=';')
        if rdf_dataframes == []:
            rdf_dataframes.append(df['radius'])
        if _column in columns:
            rdf_dataframes.append(df['rdf'].rename(_column))
        
rdf_dataframe = pd.concat(rdf_dataframes, axis=1)
rdf_dataframe

Unnamed: 0,radius,10_40_05,10_40_06,10_40_07,10_40_08,10_40_09,10_40_10,10_40_11,10_40_12,10_40_13,...,11_21_07,11_21_08,11_21_09,11_21_10,11_21_11,11_21_12,11_21_13,11_21_14,11_21_15,11_21_16
0,0.01,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
1,0.02,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2,0.03,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
3,0.04,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
4,0.05,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
607,6.08,1.024633,1.020849,1.022531,0.988054,1.014122,1.005293,1.039349,1.008656,1.001088,...,0.975021,0.969555,1.010759,0.993941,1.011179,1.029258,1.029258,1.033463,1.027156,1.053644
608,6.09,0.996965,1.012890,1.001156,0.996965,0.983136,0.997384,1.002832,1.016662,0.987746,...,1.018338,1.045577,0.979783,0.984812,0.996127,1.015823,1.053121,1.040129,1.032167,1.000318
609,6.10,0.980333,1.003306,0.999129,1.002889,1.011242,1.009989,1.002889,1.006648,0.996623,...,0.990775,0.995370,1.007901,1.026280,0.993699,1.007065,1.009572,1.001218,1.003306,1.034633
610,6.11,1.001690,0.981706,0.987951,0.989200,0.999608,0.996278,0.983372,0.984621,0.989617,...,1.025005,1.017927,1.001274,1.023756,1.052066,1.030001,0.990449,1.004604,1.011266,0.982123


In [10]:
test_rdf = pd.read_csv(
    os.path.join(con.PATH_TO_DATA, '2021-10-05_test_rmc', 'rdf_T_1.30075_P_1.46834_HV_0.02000_.csv'),
    sep=';',
)

test_rdf

Unnamed: 0,radius,rdf
0,0.01,0.000000
1,0.02,0.000000
2,0.03,0.000000
3,0.04,0.000000
4,0.05,0.000000
...,...,...
607,6.08,1.001271
608,6.09,1.001697
609,6.10,1.001940
610,6.11,1.002143


In [11]:
samples = rdf_dataframe.iloc[:,1:].to_numpy().T
samples.shape

(2464, 612)

In [12]:
test_sample = test_rdf.iloc[:,1:].to_numpy().T
test_sample.shape

(1, 612)

In [13]:
x = x_dataframe.to_numpy().T
y = y_dataframe.to_numpy().T
z = z_dataframe.to_numpy().T

targets = np.array([
    np.array([item, y[i], z[i]]).T 
    for i, item in enumerate(x)
])

targets.shape

(2464, 1372, 3)

In [14]:
np.save(os.path.join(PATH_TO_CURRENT_DATA_NORMAL, 'samples.npy'), samples)
np.save(os.path.join(PATH_TO_CURRENT_DATA_NORMAL, 'targets.npy'), targets)
np.save(os.path.join(PATH_TO_CURRENT_DATA_NORMAL, 'test_sample.npy'), test_sample)