In [1]:
import sherpa

# help function
from transfer_learning import NeuralNet_sherpa_optimize
from dataset_loader import (
    data_loader,
    all_filter,
    get_descriptors,
    one_filter,
    data_scaler,
)

# modules
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable

import os, sys
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error

from tqdm import tqdm
from scipy.stats import pearsonr

import matplotlib.pyplot as plt

parameters = [
    sherpa.Continuous(name="lr", range=[0.0002, 0.1], scale="log"),
    # sherpa.Discrete(name='Epoch', range=[10,100]),
    sherpa.Discrete(name="H_l1", range=[10, 300]),
    sherpa.Choice(
        name="activate",
        range=["nn.Hardswish", "nn.PReLU", "nn.ReLU", "nn.Sigmoid", "nn.LeakyReLU"],
    ),
]
algorithm = sherpa.algorithms.RandomSearch(max_num_trials=10)
study = sherpa.Study(
    parameters=parameters,
    algorithm=algorithm,
    lower_is_better=False,
    disable_dashboard=True,
)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
base_path = os.getcwd()
file_name = "data/CrystGrowthDesign_SI.csv"

"""
Data description.

    Descriptors:
        'void fraction', 'Vol. S.A.', 'Grav. S.A.', 'Pore diameter Limiting', 'Pore diameter Largest'
    Source task:
        'H2@100 bar/243K (wt%)'
    Target tasks:
        'H2@100 bar/130K (wt%)' 'CH4@100 bar/298 K (mg/g)' '5 bar Xe mol/kg' '5 bar Kr mol/kg'
"""

descriptor_columns = [
    "void fraction",
    "Vol. S.A.",
    "Grav. S.A.",
    "Pore diameter Limiting",
    "Pore diameter Largest",
]
one_filter_columns = ["H2@100 bar/243K (wt%)"]
another_filter_columns = ["H2@100 bar/130K (wt%)"]

# load data
data = data_loader(base_path, file_name)

# extract descriptors and gas adsorptions
one_property = one_filter(data, one_filter_columns)
descriptors = get_descriptors(data, descriptor_columns)

# prepare training inputs and outputs
X = np.array(descriptors.values, dtype=np.float32)
y = np.array(one_property.values, dtype=np.float32).reshape(len(X),)
X = data_scaler(X)
y = data_scaler(y.reshape(-1, 1)).reshape(len(X),)

# makes transfer trials... more of a legacy code ---- function cannot be pulled out of .py bc of data dependencies
data_small = data.sample(n=100, random_state=1)

another_property = one_filter(data_small, another_filter_columns)
descriptors_small = get_descriptors(data_small, descriptor_columns)

X_small = np.array(descriptors_small.values, dtype=np.float32)
y_small = np.array(another_property.values, dtype=np.float32).reshape(
    len(X_small),
)

In [2]:
from Statistics_helper import stratified_cluster_sample
for i in range(10):
    t_1,t_2,y_1,y_2=stratified_cluster_sample(1,data,descriptor_columns,one_filter_columns[0],5)
    print(y_2.iloc[0])

H2@100 bar/243K (wt%)    8.2
Name: 1660, dtype: float64
H2@100 bar/243K (wt%)    8.2
Name: 1660, dtype: float64
H2@100 bar/243K (wt%)    8.2
Name: 1660, dtype: float64
H2@100 bar/243K (wt%)    16.9
Name: 10649, dtype: float64
H2@100 bar/243K (wt%)    16.9
Name: 10649, dtype: float64
H2@100 bar/243K (wt%)    16.9
Name: 10649, dtype: float64
H2@100 bar/243K (wt%)    8.2
Name: 1660, dtype: float64
H2@100 bar/243K (wt%)    16.9
Name: 10649, dtype: float64
H2@100 bar/243K (wt%)    16.9
Name: 10649, dtype: float64
H2@100 bar/243K (wt%)    16.9
Name: 10649, dtype: float64


In [3]:
y_1

Unnamed: 0,H2@100 bar/243K (wt%)
10799,16.8
13338,6.9
1188,6.6
10395,14.2
10540,10.8
...,...
4361,5.8
11745,4.7
12233,8.0
6939,6.1


In [6]:
df1 = pd.DataFrame({'lkey': ['foo', 'bar', 'baz', 'foo'],
                    'value': [1, 2, 3, 5]})
df2 = pd.DataFrame({'rkey': ['foo', 'bar', 'baz', 'foo'],
                    'value': [5, 6, 7, 8]})
df1["value"]
pd.concat([df1,df2])
df1.drop("lkey",axis=1)

Unnamed: 0,value
0,1
1,2
2,3
3,5


In [33]:
y_1

Unnamed: 0,H2@100 bar/243K (wt%),H2@100 bar/243K (wt%).1,H2@100 bar/243K (wt%).2,H2@100 bar/243K (wt%).3,H2@100 bar/243K (wt%).4
10799,16.8,,,,
13338,6.9,,,,
1188,6.6,,,,
10395,14.2,,,,
10540,10.8,,,,
...,...,...,...,...,...
11330,,,,,4.8
11431,,,,,6.4
11745,,,,,4.7
12233,,,,,8.0
