In [8]:
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
import seaborn as sns
from utils import *
import torch
from torch import nn
from torch.nn import functional as F
import torch.utils.data as Data
torch.use_deterministic_algorithms(True)
from easydict import EasyDict as ed
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.ensemble import RandomForestRegressor
import skopt
from skopt import gp_minimize
from skopt.space import Real, Integer, Categorical
from skopt.plots import plot_convergence
from captum.attr import FeaturePermutation
if is_notebook():
    from tqdm.notebook import tqdm
else:
    from tqdm import tqdm

clr = sns.color_palette("deep")

np.random.seed(0)
torch.manual_seed(0)
import random
random.seed(0)

device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using {} device".format(device))

split_by = 'material' # 'random' or 'material'

validation = True
physics_informed = False
bayes_opt = True

data_path = '../data/SNL_MSU_DOE_fatigue.xlsx'
ckp_path = '../output/fatigue.pt'
skopt_path = '../output/skopt.pt'

n_calls = 200
layers = [16, 64, 128, 256, 256, 128, 64, 16]

# static_params = {'patience': 500, 'epoch': 2000, 'weight_decay': 0.0}
# chosen_params = {'lr': 0.0062111007822585485, 'batch_size': 128}
# SPACE = [
#     Real(1e-4, 0.05, 'log-uniform', name='lr'),
#     Categorical([32, 64, 128, 256, 512, 1024, 2048], name='batch_size'),
# ]

static_params = {'patience': 1000, 'epoch': 4000}
chosen_params = {'lr': 0.0034560325081541875, 'weight_decay': 0.0019904362054363267, 'batch_size': 1024} # for random split
# chosen_params = {'lr': 0.01, 'weight_decay': 0.005, 'batch_size': 1024}
# chosen_params = {'lr': 0.0008576159573733293, 'weight_decay': 0.005, 'batch_size': 32}
SPACE = [
    Real(1e-3, 0.05, 'log-uniform', name='lr'),
    Real(1e-5, 0.05, 'log-uniform', name='weight_decay'),
    Categorical([32, 64, 128, 256, 512, 1024, 2048, 4096], name='batch_size'),
]

Using cpu device


In [9]:
data = pd.read_excel(data_path, engine='openpyxl')

name_mapping = {
    'Material': 'Material',
    'Resin Type': 'Resin Type',
    'Vf, %': 'Fibre Volumn Fraction',
    '%, 0 Deg': 'Percentage of Fibre in 0-deg Direction',
    '%, 45 Deg': 'Percentage of Fibre in 45-deg Direction',
    '%, 90 Deg': 'Percentage of Fibre in 90-deg Direction',
    'other %': 'Percentage of Fibre in Other Direction',
    'Thickness, mm': 'Thickness',
    'Max. Stress, MPa': 'Maximum Stress',
    'Min. Stress, MPa': 'Minimum Stress',
    'R-value': 'Minimum/Maximum Stress',
    'Freq., Hz': 'Frequency',
    'E, GPa': 'Initial Elastic Modulus',
    'Max. % Strain': 'Maximum Strain',
    'Min. % Strain': 'Minimum Strain',
    'Cycles': 'Cycles to Failure',
    'Moisture Gain, %': 'Moisture Gain',
    'Testing Temperature, OC': 'Temperature',
    'Width, mm': 'Width',
    'Static Max. Stress, MPa': 'Static Maximum Tensile Stress',
    'Static Min. Stress, MPa': 'Static Maximum Compressive Stress',
    'Static E, GPa': 'Static Elastic Modulus',
    'Static Max. % Strain': 'Static Maximum Tensile Strain',
    'Static Min. % Strain': 'Static Maximum Compressive Strain',
    'Absolute Maximum Stress': 'Absolute Maximum Stress',
    'Absolute Peak-to-peak Stress': 'Absolute Peak-to-peak Stress',
    'Relative Maximum Stress': 'Relative Maximum Stress',
    'Relative Peak-to-peak Stress': 'Relative Peak-to-peak Stress'
}

data = replace_column_name(data, name_mapping)

feature_names = ['Percentage of Fibre in 0-deg Direction',
                 'Percentage of Fibre in 45-deg Direction',
                 'Percentage of Fibre in 90-deg Direction',
                 'Percentage of Fibre in Other Direction',
                 'Absolute Maximum Stress',
                 'Absolute Peak-to-peak Stress',
                 'Frequency',
                 'Fibre Volumn Fraction',
                 'Relative Maximum Stress',
                 'Relative Peak-to-peak Stress',
                 'Thickness',
                 'Static Maximum Tensile Stress',
                 'Static Maximum Tensile Strain',
                 'Static Elastic Modulus']

label_name = ['Cycles to Failure']

tmp_data = data[feature_names+label_name+['Material', 'Lay-up']].copy().dropna(axis=0)

material_names = tmp_data['Material'].copy()
lay_up = tmp_data['Lay-up'].copy()
mat_lay = np.array([x+y for x,y in zip(material_names, lay_up)], dtype=str)
mat_lay_set = list(set(mat_lay))

data = data[feature_names+label_name].dropna(axis=0)
feature_data = data[feature_names]
label_data = np.log10(data[label_name])

X = torch.tensor(feature_data.values, dtype=torch.float32).to(device)
y = torch.tensor(label_data.values, dtype=torch.float32).to(device)
dataset = Data.TensorDataset(X, y)

if validation:
    train_val_test = np.array([0.6, 0.2, 0.2])
    if split_by == 'random':
        train_size = np.floor(len(label_data) * train_val_test[0]).astype(int)
        val_size = np.floor(len(label_data) * train_val_test[1]).astype(int)
        test_size = len(label_data) - train_size - val_size
        train_dataset, val_dataset, test_dataset = Data.random_split(dataset, [train_size, val_size, test_size], generator=torch.Generator().manual_seed(0))
    elif split_by == 'material':
        train_dataset, val_dataset, test_dataset = split_by_material(dataset, mat_lay, mat_lay_set, train_val_test, validation)
    else:
        raise Exception('Split type not implemented')

    print('Dataset size:', len(train_dataset), len(val_dataset), len(test_dataset))
else:
    train_test = np.array([0.8, 0.2])
    if split_by == 'random':
        train_size = np.floor(len(label_data) * train_test[0]).astype(int)
        test_size = len(label_data) - train_size
        train_dataset, test_dataset = Data.random_split(dataset, [train_size, test_size], generator=torch.Generator().manual_seed(0))
    elif split_by == 'material':
        train_dataset, test_dataset = split_by_material(dataset, mat_lay, mat_lay_set, train_test, validation)
    else:
        raise Exception('Split type not implemented')
    print('Dataset size:', len(train_dataset), len(test_dataset))

scaler = StandardScaler()
# scaler = MinMaxScaler()
scaler.fit(train_dataset.dataset.tensors[0].cpu().numpy()[train_dataset.indices, :])
# torch.data.Dataset.Subset share the same memory, so only transform once.
transformed = scaler.transform(train_dataset.dataset.tensors[0].cpu().numpy())
train_dataset.dataset.tensors = (torch.tensor(transformed, dtype=torch.float32).to(device), train_dataset.dataset.tensors[1])
X = torch.tensor(scaler.transform(X.cpu().numpy()), dtype=torch.float32).to(device)

Dataset size: 2948 911 949


In [15]:
test = mat_lay[test_dataset.indices]
val = mat_lay[val_dataset.indices]
train = mat_lay[train_dataset.indices]

for name in val:
    if name in train:
        print('shit')


In [3]:
ax

array([[<AxesSubplot:xlabel='lr', ylabel='Partial dependence'>,
        <AxesSubplot:>, <AxesSubplot:>],
       [<AxesSubplot:ylabel='weight_decay'>,
        <AxesSubplot:xlabel='weight_decay', ylabel='Partial dependence'>,
        <AxesSubplot:>],
       [<AxesSubplot:xlabel='lr', ylabel='batch_size'>,
        <AxesSubplot:xlabel='weight_decay'>,
        <AxesSubplot:xlabel='batch_size', ylabel='Partial dependence'>]],
      dtype=object)

In [4]:
ax[1,0].get_children()

[<matplotlib.collections.PathCollection at 0x7fbd77b3ec88>,
 <matplotlib.collections.PathCollection at 0x7fbd77b3ef98>,
 <matplotlib.collections.PathCollection at 0x7fbd770be2b0>,
 <matplotlib.collections.PathCollection at 0x7fbd770be588>,
 <matplotlib.collections.PathCollection at 0x7fbd770be860>,
 <matplotlib.collections.PathCollection at 0x7fbd770beb38>,
 <matplotlib.collections.PathCollection at 0x7fbd770beda0>,
 <matplotlib.collections.PathCollection at 0x7fbd770c8048>,
 <matplotlib.collections.PathCollection at 0x7fbd770c82b0>,
 <matplotlib.collections.PathCollection at 0x7fbd770c8550>,
 <matplotlib.collections.PathCollection at 0x7fbd770c87f0>,
 <matplotlib.collections.PathCollection at 0x7fbd77b3ea58>,
 <matplotlib.collections.PathCollection at 0x7fbd770c8c50>,
 <matplotlib.spines.Spine at 0x7fbc9e994400>,
 <matplotlib.spines.Spine at 0x7fbc9e988fd0>,
 <matplotlib.spines.Spine at 0x7fbc9e988ef0>,
 <matplotlib.spines.Spine at 0x7fbc9e988b70>,
 <matplotlib.axis.XAxis at 0x7fbc9e9