Importing notebook containing class describing dataset.

In [1]:
import import_ipynb
import training_dataset as td
import os
import torch.nn as nn
import locations as l

listing folders in dataset folder

In [2]:
l.locations.list_datasets()

['Si_jaw_delta',
 'lifesat',
 'half_new_Si_jaw_delta',
 'quarter_new_Si_jaw_delta',
 'si02',
 'Si_jaw',
 'new_Si_jaw_delta',
 '.ipynb_checkpoints',
 'R2_comparison_2.ipynb']

In [3]:
datasets_folder = l.locations.get_data_dir()

In [4]:
specific_dataset_location = os.path.join(datasets_folder, "new_Si_jaw_delta")

creating new instance of training_dataset class

In [5]:
t_dataset = td.training_dataset(specific_dataset_location, col = ['wavelength', 'psi65', 'del65', 'psi70', 'del70', 'psi75', 'del75'])

creating scalers based on chosen columns

In [6]:
t_dataset.flat_create_scaler(col=['wavelength', 'psi65', 'del65', 'psi70', 'del70', 'psi75', 'del75'])
t_dataset.flat_create_scaler(col =['T','A','B','C'])

['wavelength', 'psi65', 'del65', 'psi70', 'del70', 'psi75', 'del75']
['wavelength', 'psi65', 'del65', 'psi70', 'del70', 'psi75', 'del75']
['T', 'A', 'B', 'C']
['T', 'A', 'B', 'C']


listing scalers present in scalers folder - previously created scalers are listed as  'scaler_wavelength_psi65_del65_psi70_del70_psi75_del75.pkl' and 'scaler_T_A_B_C.pkl'

In [None]:
l.locations.list_scalers()

training model using data scaled with previously generated scalers

In [15]:
t_dataset.train(model_name = "default", feature_columns=['wavelength', 'psi65', 'del65', 'psi70', 'del70', 'psi75', 'del75'], target_columns=['A'], hidden_layers=[32, 16, 16,8], loss=nn.MSELoss(), bs=0)

KeyboardInterrupt: 

In [16]:
t_dataset.test("model_wavelength_psi65_del65_psi70_del70_psi75_del75_to_A__layers_32_16_16_8.pth",
               feature_columns=['wavelength', 'psi65', 'del65', 'psi70', 'del70', 'psi75', 'del75'],
               target_columns=['A'])

Train R2: 0.7497936487197876
Test R2: 0.7481371760368347
Combined R2: 0.7494618892669678


{'train_r2': 0.7497936487197876,
 'test_r2': 0.7481371760368347,
 'combined_r2': 0.7494618892669678}

listing models from models folder - previously created model is listed as 'model_wavelength_psi65_del65_psi70_del70_psi75_del75_to_T_A_B_C_standardized_layers_128_64_32.pth'

In [None]:
l.locations.list_models()

In [None]:
model_dir = l.locations.get_models_dir()

In [None]:
model_path = os.path.join(model_dir, "model_wavelength_psi65_del65_psi70_del70_psi75_del75_to_T_A_B_C_standardized_layers_128_64_32.pth")

calculating R2 coefficient for each target column for both training dataset and test dataset

In [None]:
t_dataset.flat_test_r2_standarized("scaler_wavelength_psi65_del65_psi70_del70_psi75_del75.pkl",
                          "scaler_T_A_B_C.pkl",
                          model_name = "model_wavelength_psi65_del65_psi70_del70_psi75_del75_to_T_A_B_C_standardized_layers_128_64_32.pth",
                          feature_columns=['wavelength', 'psi65', 'del65', 'psi70', 'del70', 'psi75', 'del75'],
                          target_columns = ['T','A','B','C'])


In [7]:
t_dataset.flat_test_r2_100000(model_name = "model_wavelength_psi65_del65_psi70_del70_psi75_del75_to_C_non_standardized_layers_48_48_48_48_48_48_48.pth",
                          feature_columns=['wavelength', 'psi65', 'del65', 'psi70', 'del70', 'psi75', 'del75'],
                          target_columns = ['C'])


C Train R2: -0.8856510672763347
C Test R2: -1.0249607424219538


In [8]:
t_dataset.flat_test_r2_1000(model_name = "model_wavelength_psi65_del65_psi70_del70_psi75_del75_to_B_non_standardized_layers_48_48_48_48_48.pth",
                          feature_columns=['wavelength', 'psi65', 'del65', 'psi70', 'del70', 'psi75', 'del75'],
                          target_columns = ['B'])

B Train R2: 0.9497079473724275
B Test R2: 0.9672352474313074


In [9]:
t_dataset.flat_test_r2(model_name = "model_wavelength_psi65_del65_psi70_del70_psi75_del75_to_A_non_standardized_layers_64_32_16.pth",
                          feature_columns=['wavelength', 'psi65', 'del65', 'psi70', 'del70', 'psi75', 'del75'],
                          target_columns = ['A'])

A Train R2: 0.9832808699103135
A Test R2: 0.7200245733891281


In [10]:
t_dataset.flat_test_r2(model_name = "model_wavelength_psi65_del65_psi70_del70_psi75_del75_to_T_non_standardized_layers_128_64_32.pth",
                          feature_columns=['wavelength', 'psi65', 'del65', 'psi70', 'del70', 'psi75', 'del75'],
                          target_columns = ['T'])

T Train R2: 0.9999787137017563
T Test R2: 0.9996768833704095


In [None]:
t_dataset.lin_reg(target_columns = ["T"])

In [None]:
t_dataset.lin_reg(target_columns = ["A"])

In [None]:
t_dataset.lin_reg(target_columns = ["B"])

In [None]:
t_dataset.lin_reg(target_columns = ["C"])

In [None]:
t_dataset.flat_lin_reg(target_columns = ["T"])

In [None]:
t_dataset.flat_lin_reg(target_columns = ["A"])

In [None]:
t_dataset.flat_lin_reg(target_columns = ["B"])

In [None]:
t_dataset.flat_lin_reg(target_columns = ["C"])

In [None]:
t_dataset.get_mean_r2_score(model = "model_wavelength_psi65_del65_psi70_del70_psi75_del75_to_T_non_standardized_layers_48_32_16_8.pth", features = ['wavelength', 'psi65', 'del65', 'psi70', 'del70', 'psi75', 'del75'], targets = ['T'])