### FT_transformer
- https://github.com/Yura52/tabular-dl-revisiting-models


In [1]:
import numpy as np
import pandas as pd
from copy import deepcopy
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
import seaborn as sns
import os
import warnings

warnings.filterwarnings("ignore")

## p10

In [2]:
train = pd.read_csv("../data/preprocessed/p10_train.csv")
train['istrain'] = 1
test = pd.read_csv("../data/preprocessed/p10_test.csv")
test['istrain'] = 0
df = pd.concat([train, test]).reset_index(drop=True)

X_name = [f'element {i}' for i in range(1,13)]
Y_name = [f'property {i}' for i in range(1,7)]

poolx = df[X_name].copy(deep=True)
pooly = df[Y_name].copy(deep=True)

X = poolx.iloc[df[df['istrain']==1].index, :]
Y = pooly.iloc[df[df['istrain']==1].index, :]

X_test = poolx.iloc[df[df['istrain']==0].index, :].reset_index(drop=True)
Y_test = pooly.iloc[df[df['istrain']==0].index, :].reset_index(drop=True)

pred = torch.zeros((X_test.shape[0], 6))

X.shape

(1372, 12)

In [6]:
from sklearn.model_selection import KFold
from utils.data import MyDataset
from torch.utils.data import DataLoader
from utils.trainer import trainer
from models.ft_transformer import FT_Transformer

os.environ["CUDA_DEVICE_ORDER"] = 'PCI_BUS_ID'
os.environ["CUDA_VISIBLE_DEVICES"] = '0'

device = 'cuda' if torch.cuda.is_available() else 'cpu'

kf = KFold(n_splits=5, random_state=2022, shuffle=True)

oof_train = torch.zeros((X.shape[0], 6))
oof_real_train = torch.zeros((X.shape[0], 6))

use_cat=False


for now_fold, (trn_idx, val_idx) in enumerate(kf.split(X, Y)):
    print(f"Fold: {now_fold}")
    X_train, X_val = np.array(X.iloc[trn_idx]), np.array(X.iloc[val_idx])
    y_train, y_val = np.array(Y.iloc[trn_idx]), np.array(Y.iloc[val_idx])
    
    X_train, X_val = torch.tensor(X_train).float(), torch.tensor(X_val).float()
    y_train, y_val = torch.tensor(y_train).float(), torch.tensor(y_val).float()
    
    train_dataset = MyDataset(X_train, y_train)
    valid_dataset = MyDataset(X_val, y_val)
    
    train_dataloader = DataLoader(train_dataset, batch_size=128, shuffle=True)
    valid_dataloader = DataLoader(valid_dataset, batch_size=128, shuffle=True)
    
    
    model = FT_Transformer(d_numerical=X.shape[1], categories=None, token_bias=True, n_layers=3, d_token=256, n_heads=4,
                           d_ffn_factor=3.0, attention_dropout=0.2, ffn_dropout=0.2,residual_dropout=0.2)
                      
    final_model = trainer(model, train_dataloader, valid_dataloader, device, max_epochs=2000, early_stop=20, use_cat=use_cat)
    final_model = final_model.to('cpu')
    if use_cat:
        oof_train[val_idx]= final_model(X_val[:,:-12], X_val[:,-12:].int() ).detach()
        pred += final_model(
            torch.Tensor(X_test.values[:,:-12]), torch.Tensor(X_test.values[:,-12:]).int()
                            ).detach() / 5
    else:
        oof_train[val_idx] = final_model(X_val, None).detach()
        oof_real_train[trn_idx] += final_model(X_train, None).detach() / 4
        pred += final_model(torch.Tensor(X_test.values).long(), None).detach() /5
    

Fold: 0
epoch:    1 | train_loss: 11.5861 | train_mae: 0.7343 | train_r2: -0.3450 | train_rmse: 1.1152 | train_pccs: -0.0086
epoch:    1 | valid_loss: 8.4957 | valid_mae: 0.6641 | valid_r2: 0.0021 | valid_rmse: 1.5960 | valid_pccs: 0.3395
epoch:    2 | train_loss: 8.7514 | train_mae: 0.6071 | train_r2: -0.0028 | train_rmse: 0.9758 | train_pccs: 0.1678
epoch:    2 | valid_loss: 8.3052 | valid_mae: 0.6365 | valid_r2: 0.0116 | valid_rmse: 1.5951 | valid_pccs: 0.4602
epoch:    3 | train_loss: 7.8398 | train_mae: 0.5435 | train_r2: 0.1015 | train_rmse: 0.9254 | train_pccs: 0.3529
epoch:    3 | valid_loss: 8.2728 | valid_mae: 0.5140 | valid_r2: 0.2718 | valid_rmse: 1.4483 | valid_pccs: 0.4814
epoch:    4 | train_loss: 6.4555 | train_mae: 0.3878 | train_r2: 0.3492 | train_rmse: 0.7510 | train_pccs: 0.4719
epoch:    4 | valid_loss: 7.3374 | valid_mae: 0.3600 | valid_r2: 0.4241 | valid_rmse: 1.3083 | valid_pccs: 0.5669
epoch:    5 | train_loss: 5.7184 | train_mae: 0.3464 | train_r2: 0.4088 | tr

In [7]:
from scipy.stats import pearsonr
from sklearn.metrics import mean_absolute_error, r2_score
print("train")

for i in range(Y.shape[1]):
    mae = mean_absolute_error(Y.iloc[:,i], oof_real_train[:,i])
    r2 = r2_score(Y.iloc[:,i], oof_real_train[:,i])
    rmse = np.sqrt(((Y.iloc[:, i] - np.array(oof_real_train[:,i])) ** 2).mean())
    pccs = pearsonr(Y.iloc[:, i], oof_real_train[:,i])[0]
    mae, r2, rmse, pccs = np.around([mae, r2, rmse, pccs], decimals=4)
    print(f'property{i+1}: mae:{mae}, rmse:{rmse}, r2:{r2}, pccs:{pccs}')

print("valid")

for i in range(Y.shape[1]):
    mae = mean_absolute_error(Y.iloc[:,i], oof_train[:,i])
    r2 = r2_score(Y.iloc[:,i], oof_train[:,i])
    rmse = np.sqrt(((Y.iloc[:, i] - np.array(oof_train[:,i])) ** 2).mean())
    pccs = pearsonr(Y.iloc[:, i], oof_train[:,i])[0]
    mae, r2, rmse, pccs = np.around([mae, r2, rmse, pccs], decimals=4)
    print(f'property{i+1}: mae:{mae}, rmse:{rmse}, r2:{r2}, pccs:{pccs}')

print("test")
for i in range(Y_test.shape[1]):
    mae = mean_absolute_error(Y_test.iloc[:,i], pred[:,i])
    r2 = r2_score(Y_test.iloc[:,i], pred[:,i])
    rmse = np.sqrt(((Y_test.iloc[:, i] - np.array(pred[:,i])) ** 2).mean())
    pccs = pearsonr(Y_test.iloc[:, i], pred[:,i])[0]
    mae, r2, rmse, pccs = np.around([mae, r2, rmse, pccs], decimals=4)
    print(f'property{i+1}: mae:{mae}, rmse:{rmse}, r2:{r2}, pccs:{pccs}')



train
property1: mae:0.209, rmse:1.582, r2:0.2014, pccs:0.4788
property2: mae:0.0619, rmse:0.0926, r2:0.989, pccs:0.9945
property3: mae:0.1272, rmse:0.3469, r2:0.8151, pccs:0.9037
property4: mae:0.0489, rmse:0.0678, r2:0.9948, pccs:0.9974
property5: mae:0.2083, rmse:1.6242, r2:0.0137, pccs:0.1266
property6: mae:0.6086, rmse:0.7918, r2:0.3311, pccs:0.5761
valid
property1: mae:0.2326, rmse:1.7051, r2:0.0724, pccs:0.2782
property2: mae:0.0693, rmse:0.1093, r2:0.9847, pccs:0.9923
property3: mae:0.1377, rmse:0.3719, r2:0.7874, pccs:0.8874
property4: mae:0.0592, rmse:0.0904, r2:0.9908, pccs:0.9954
property5: mae:0.2204, rmse:1.6399, r2:-0.0054, pccs:0.0153
property6: mae:0.6161, rmse:0.8011, r2:0.3152, pccs:0.5615
test
property1: mae:3.0135, rmse:3.0353, r2:-77.996, pccs:0.0141
property2: mae:1.7818, rmse:2.0021, r2:-3.7922, pccs:0.0683
property3: mae:0.9512, rmse:1.0798, r2:-1.052, pccs:0.1793
property4: mae:2.0741, rmse:2.2852, r2:-4.6703, pccs:0.0543
property5: mae:0.6096, rmse:0.7748, r2

## p11

In [8]:
train = pd.read_csv("../data/preprocessed/p11_train_scaling_x.csv")
train['istrain'] = 1
test = pd.read_csv("../data/preprocessed/p11_test_scaling_x.csv")
test['istrain'] = 0
df = pd.concat([train, test]).reset_index(drop=True)
df

Unnamed: 0,element 1,element 2,element 3,element 4,element 5,element 6,element 7,element 8,element 9,element 10,element 11,element 12,property 1,property 2,property 3,property 4,property 5,property 6,istrain
0,0.156430,1.365596,1.089062,1.438439,0.553623,-0.538533,0.777039,-0.751979,-0.244073,-0.392599,-0.595071,-0.435662,0.317895,0.589088,0.752391,0.328302,-0.206129,0.44283,1
1,-1.334657,-0.814928,0.061231,1.950942,-1.007620,2.284940,-1.413693,-1.036635,-1.283344,-0.392599,1.942505,-0.435662,-0.108256,-0.574822,-0.221364,-0.926633,0.022768,-0.68232,1
2,1.015501,-0.674548,0.859498,-0.414699,-0.232666,-0.453725,-0.259303,0.363869,-0.896059,-0.392599,-0.595071,-0.435662,-0.092087,1.220121,0.834930,1.319043,-0.018376,0.44283,1
3,-1.348405,-0.814928,-1.660748,-0.420196,-1.384360,2.294403,0.187271,-1.036635,1.952420,3.009011,1.979808,-0.434092,-0.106166,-0.858744,-0.568540,-0.930921,0.023050,1.04291,1
4,-1.345306,-0.814928,-1.660748,-0.387085,-1.382884,2.294167,3.445090,-1.036635,2.086309,2.991566,1.984247,-0.433922,1.642237,-0.833978,-0.434720,-0.930877,0.242608,0.21780,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1955,-0.414090,1.565984,2.160322,1.849679,0.170248,-0.005512,0.259176,-0.833592,-0.520639,-0.392599,-0.595071,-0.435662,0.474445,0.200240,0.660588,-0.332144,-0.728097,2.46810,0
1956,-1.322808,-0.814928,0.668456,1.308024,-1.129060,2.294157,-1.413693,-1.036635,-1.283344,0.208713,-0.595071,-0.432458,-0.108256,-0.833092,-0.102687,-0.927217,0.022777,0.74287,0
1957,0.592949,1.227823,0.083139,-0.461015,0.582654,-0.538533,0.626345,0.956747,0.560394,-0.392599,-0.595071,-0.435662,-0.091459,0.897061,0.799480,0.861857,0.011591,0.14279,0
1958,-0.981138,1.528849,0.133214,2.013338,0.633104,-0.393047,0.684045,0.220007,0.610984,-0.392599,1.951842,-0.435662,1.503417,-0.287795,0.468611,-0.790559,0.491971,-1.20739,0


In [9]:
X_name = [f'element {i}' for i in range(1,13)]
Y_name = [f'property {i}' for i in range(1,7)]

poolx = df[X_name].copy(deep=True)
pooly = df[Y_name].copy(deep=True)

X = poolx.iloc[df[df['istrain']==1].index, :]
Y = pooly.iloc[df[df['istrain']==1].index, :]

X_test = poolx.iloc[df[df['istrain']==0].index, :].reset_index(drop=True)
Y_test = pooly.iloc[df[df['istrain']==0].index, :].reset_index(drop=True)

pred = torch.zeros((X_test.shape[0], 6))

In [10]:
from sklearn.model_selection import KFold
from utils.data import MyDataset
from torch.utils.data import DataLoader
from utils.trainer import trainer
from models.ft_transformer import FT_Transformer

os.environ["CUDA_DEVICE_ORDER"] = 'PCI_BUS_ID'
os.environ["CUDA_VISIBLE_DEVICES"] = '0'

device = 'cuda' if torch.cuda.is_available() else 'cpu'

kf = KFold(n_splits=5, random_state=2022, shuffle=True)

oof_train = torch.zeros((X.shape[0], 6))
oof_real_train = torch.zeros((X.shape[0], 6))

use_cat=False


for now_fold, (trn_idx, val_idx) in enumerate(kf.split(X, Y)):
    print(f"Fold: {now_fold}")
    X_train, X_val = np.array(X.iloc[trn_idx]), np.array(X.iloc[val_idx])
    y_train, y_val = np.array(Y.iloc[trn_idx]), np.array(Y.iloc[val_idx])
    
    X_train, X_val = torch.tensor(X_train).float(), torch.tensor(X_val).float()
    y_train, y_val = torch.tensor(y_train).float(), torch.tensor(y_val).float()
    
    train_dataset = MyDataset(X_train, y_train)
    valid_dataset = MyDataset(X_val, y_val)
    
    train_dataloader = DataLoader(train_dataset, batch_size=128, shuffle=True)
    valid_dataloader = DataLoader(valid_dataset, batch_size=128, shuffle=True)
    
    
    model = FT_Transformer(d_numerical=X.shape[1], categories=None, token_bias=True, n_layers=3, d_token=256, n_heads=4,
                           d_ffn_factor=3.0, attention_dropout=0.2, ffn_dropout=0.2,residual_dropout=0.2)
                      
    final_model = trainer(model, train_dataloader, valid_dataloader, device, max_epochs=2000, early_stop=20, use_cat=use_cat)
    final_model = final_model.to('cpu')
    if use_cat:
        oof_train[val_idx]= final_model(X_val[:,:-12], X_val[:,-12:].int() ).detach()
        pred += final_model(
            torch.Tensor(X_test.values[:,:-12]), torch.Tensor(X_test.values[:,-12:]).int()
                            ).detach() / 5
    else:
        oof_train[val_idx] = final_model(X_val, None).detach()
        oof_real_train[trn_idx] += final_model(X_train, None).detach() / 4
        pred += final_model(torch.Tensor(X_test.values).long(), None).detach() /5

Fold: 0
epoch:    1 | train_loss: 11.0454 | train_mae: 0.7220 | train_r2: -0.3286 | train_rmse: 1.1032 | train_pccs: 0.0928
epoch:    1 | valid_loss: 7.8816 | valid_mae: 0.5592 | valid_r2: 0.1549 | valid_rmse: 1.5246 | valid_pccs: 0.4639
epoch:    2 | train_loss: 6.5857 | train_mae: 0.4455 | train_r2: 0.2972 | train_rmse: 0.8155 | train_pccs: 0.4596
epoch:    2 | valid_loss: 7.4784 | valid_mae: 0.4176 | valid_r2: 0.3732 | valid_rmse: 1.3906 | valid_pccs: 0.5252
epoch:    3 | train_loss: 5.4717 | train_mae: 0.3438 | train_r2: 0.4304 | train_rmse: 0.7169 | train_pccs: 0.5514
epoch:    3 | valid_loss: 7.5252 | valid_mae: 0.3813 | valid_r2: 0.4327 | valid_rmse: 1.3345 | valid_pccs: 0.5610
epoch:    4 | train_loss: 5.8768 | train_mae: 0.2915 | train_r2: 0.4943 | train_rmse: 0.6560 | train_pccs: 0.6066
epoch:    4 | valid_loss: 7.0806 | valid_mae: 0.3234 | valid_r2: 0.4893 | valid_rmse: 1.2753 | valid_pccs: 0.5847
epoch:    5 | train_loss: 4.8017 | train_mae: 0.2644 | train_r2: 0.5222 | trai

In [11]:
from scipy.stats import pearsonr
from sklearn.metrics import mean_absolute_error, r2_score
print("train")

for i in range(Y.shape[1]):
    mae = mean_absolute_error(Y.iloc[:,i], oof_real_train[:,i])
    r2 = r2_score(Y.iloc[:,i], oof_real_train[:,i])
    rmse = np.sqrt(((Y.iloc[:, i] - np.array(oof_real_train[:,i])) ** 2).mean())
    pccs = pearsonr(Y.iloc[:, i], oof_real_train[:,i])[0]
    mae, r2, rmse, pccs = np.around([mae, r2, rmse, pccs], decimals=4)
    print(f'property{i+1}: mae:{mae}, rmse:{rmse}, r2:{r2}, pccs:{pccs}')

print("valid")

for i in range(Y.shape[1]):
    mae = mean_absolute_error(Y.iloc[:,i], oof_train[:,i])
    r2 = r2_score(Y.iloc[:,i], oof_train[:,i])
    rmse = np.sqrt(((Y.iloc[:, i] - np.array(oof_train[:,i])) ** 2).mean())
    pccs = pearsonr(Y.iloc[:, i], oof_train[:,i])[0]
    mae, r2, rmse, pccs = np.around([mae, r2, rmse, pccs], decimals=4)
    print(f'property{i+1}: mae:{mae}, rmse:{rmse}, r2:{r2}, pccs:{pccs}')

print("test")
for i in range(Y_test.shape[1]):
    mae = mean_absolute_error(Y_test.iloc[:,i], pred[:,i])
    r2 = r2_score(Y_test.iloc[:,i], pred[:,i])
    rmse = np.sqrt(((Y_test.iloc[:, i] - np.array(pred[:,i])) ** 2).mean())
    pccs = pearsonr(Y_test.iloc[:, i], pred[:,i])[0]
    mae, r2, rmse, pccs = np.around([mae, r2, rmse, pccs], decimals=4)
    print(f'property{i+1}: mae:{mae}, rmse:{rmse}, r2:{r2}, pccs:{pccs}')

train
property1: mae:0.1263, rmse:1.3567, r2:0.4127, pccs:0.7753
property2: mae:0.0625, rmse:0.093, r2:0.9889, pccs:0.9944
property3: mae:0.1046, rmse:0.2787, r2:0.8806, pccs:0.9386
property4: mae:0.0584, rmse:0.0843, r2:0.992, pccs:0.996
property5: mae:0.1996, rmse:1.5237, r2:0.132, pccs:0.5285
property6: mae:0.4644, rmse:0.6276, r2:0.5798, pccs:0.7629
valid
property1: mae:0.1893, rmse:1.4954, r2:0.2865, pccs:0.6145
property2: mae:0.0803, rmse:0.123, r2:0.9806, pccs:0.9902
property3: mae:0.1304, rmse:0.3166, r2:0.846, pccs:0.9198
property4: mae:0.0789, rmse:0.1164, r2:0.9848, pccs:0.9924
property5: mae:0.2625, rmse:1.6516, r2:-0.0198, pccs:0.0261
property6: mae:0.5117, rmse:0.6851, r2:0.4992, pccs:0.7068
test
property1: mae:0.1415, rmse:0.2735, r2:0.3588, pccs:0.6781
property2: mae:0.1681, rmse:0.2044, r2:0.95, pccs:0.9827
property3: mae:0.1425, rmse:0.3369, r2:0.8002, pccs:0.9173
property4: mae:0.1866, rmse:0.2587, r2:0.9273, pccs:0.9695
property5: mae:0.1277, rmse:0.5446, r2:-0.0138

## p12

In [12]:
train = pd.read_csv("../data/preprocessed/p12_train_scaling_all.csv")
train['istrain'] = 1
test = pd.read_csv("../data/preprocessed/p12_test_scaling_all.csv")
test['istrain'] = 0
df = pd.concat([train, test]).reset_index(drop=True)
df

Unnamed: 0,element 1,element 2,element 3,element 4,element 5,element 6,element 7,element 8,element 9,element 10,element 11,element 12,property 1,property 2,property 3,property 4,property 5,property 6,istrain
0,0.156430,1.365596,1.089062,1.438439,0.553623,-0.538533,0.777039,-0.751979,-0.244073,-0.392599,-0.595071,-0.435662,1.909246,0.085279,0.509680,-0.028579,-0.140768,0.285962,1
1,-1.334657,-0.814928,0.061231,1.950942,-1.007620,2.284940,-1.413693,-1.036635,-1.283344,-0.392599,1.942505,-0.435662,-0.659479,-1.111739,-1.335816,-1.280247,0.025453,-1.003582,1
2,1.015501,-0.674548,0.859498,-0.414699,-0.232666,-0.453725,-0.259303,0.363869,-0.896059,-0.392599,-0.595071,-0.435662,-0.370411,1.038947,0.813851,1.080689,-0.004302,0.285962,1
3,-1.348405,-0.814928,-1.660748,-0.420196,-1.384360,2.294403,0.187271,-1.036635,1.952420,3.009011,1.979808,-0.434092,-0.620311,-1.315985,-1.531143,-1.284270,0.025657,0.848095,1
4,-1.345306,-0.814928,-1.660748,-0.387085,-1.382884,2.294167,3.445090,-1.036635,2.086309,2.991566,1.984247,-0.433922,2.136396,-1.298985,-1.467415,-1.284228,0.183538,0.057225,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1955,-0.414090,1.565984,2.160322,1.849679,0.170248,-0.005512,0.259176,-0.833592,-0.520639,-0.392599,-0.595071,-0.435662,2.027562,-0.398008,0.204450,-0.708630,-0.525051,2.010495,0
1956,-1.322808,-0.814928,0.668456,1.308024,-1.129060,2.294157,-1.413693,-1.036635,-1.283344,0.208713,-0.595071,-0.432458,-0.659479,-1.298374,-1.239738,-1.280795,0.025460,0.574521,0
1957,0.592949,1.227823,0.083139,-0.461015,0.582654,-0.538533,0.626345,0.956747,0.560394,-0.392599,-0.595071,-0.435662,-0.359820,0.525837,0.679663,0.558113,0.017376,-0.021851,0
1958,-0.981138,1.528849,0.133214,2.013338,0.633104,-0.393047,0.684045,0.220007,0.610984,-0.392599,1.951842,-0.435662,2.135317,-0.880128,-0.329585,-1.151944,0.361275,-1.718036,0


In [13]:
X_name = [f'element {i}' for i in range(1,13)]
Y_name = [f'property {i}' for i in range(1,7)]

poolx = df[X_name].copy(deep=True)
pooly = df[Y_name].copy(deep=True)

X = poolx.iloc[df[df['istrain']==1].index, :]
Y = pooly.iloc[df[df['istrain']==1].index, :]

X_test = poolx.iloc[df[df['istrain']==0].index, :].reset_index(drop=True)
Y_test = pooly.iloc[df[df['istrain']==0].index, :].reset_index(drop=True)

pred = torch.zeros((X_test.shape[0], 6))

In [14]:
from sklearn.model_selection import KFold
from utils.data import MyDataset
from torch.utils.data import DataLoader
from utils.trainer import trainer
from models.ft_transformer import FT_Transformer

os.environ["CUDA_DEVICE_ORDER"] = 'PCI_BUS_ID'
os.environ["CUDA_VISIBLE_DEVICES"] = '0'

device = 'cuda' if torch.cuda.is_available() else 'cpu'

kf = KFold(n_splits=5, random_state=2022, shuffle=True)

oof_train = torch.zeros((X.shape[0], 6))
oof_real_train = torch.zeros((X.shape[0], 6))

use_cat=False


for now_fold, (trn_idx, val_idx) in enumerate(kf.split(X, Y)):
    print(f"Fold: {now_fold}")
    X_train, X_val = np.array(X.iloc[trn_idx]), np.array(X.iloc[val_idx])
    y_train, y_val = np.array(Y.iloc[trn_idx]), np.array(Y.iloc[val_idx])
    
    X_train, X_val = torch.tensor(X_train).float(), torch.tensor(X_val).float()
    y_train, y_val = torch.tensor(y_train).float(), torch.tensor(y_val).float()
    
    train_dataset = MyDataset(X_train, y_train)
    valid_dataset = MyDataset(X_val, y_val)
    
    train_dataloader = DataLoader(train_dataset, batch_size=128, shuffle=True)
    valid_dataloader = DataLoader(valid_dataset, batch_size=128, shuffle=True)
    
    
    model = FT_Transformer(d_numerical=X.shape[1], categories=None, token_bias=True, n_layers=3, d_token=256, n_heads=4,
                           d_ffn_factor=3.0, attention_dropout=0.2, ffn_dropout=0.2,residual_dropout=0.2)
                      
    final_model = trainer(model, train_dataloader, valid_dataloader, device, max_epochs=2000, early_stop=20, use_cat=use_cat)
    final_model = final_model.to('cpu')
    if use_cat:
        oof_train[val_idx]= final_model(X_val[:,:-12], X_val[:,-12:].int() ).detach()
        pred += final_model(
            torch.Tensor(X_test.values[:,:-12]), torch.Tensor(X_test.values[:,-12:]).int()
                            ).detach() / 5
    else:
        oof_train[val_idx] = final_model(X_val, None).detach()
        oof_real_train[trn_idx] += final_model(X_train, None).detach() / 4
        pred += final_model(torch.Tensor(X_test.values).long(), None).detach() /5

Fold: 0
epoch:    1 | train_loss: 8.5324 | train_mae: 0.7029 | train_r2: 0.0326 | train_rmse: 0.9759 | train_pccs: 0.3873
epoch:    1 | valid_loss: 1.9214 | valid_mae: 0.4086 | valid_r2: 0.5517 | valid_rmse: 0.7277 | valid_pccs: 0.6962
epoch:    2 | train_loss: 3.5984 | train_mae: 0.3417 | train_r2: 0.6098 | train_rmse: 0.5667 | train_pccs: 0.7176
epoch:    2 | valid_loss: 1.5135 | valid_mae: 0.2874 | valid_r2: 0.6486 | valid_rmse: 0.5930 | valid_pccs: 0.7227
epoch:    3 | train_loss: 2.9961 | train_mae: 0.2728 | train_r2: 0.6643 | train_rmse: 0.4860 | train_pccs: 0.7417
epoch:    3 | valid_loss: 1.4257 | valid_mae: 0.2628 | valid_r2: 0.6781 | valid_rmse: 0.5599 | valid_pccs: 0.7386
epoch:    4 | train_loss: 2.8002 | train_mae: 0.2428 | train_r2: 0.6853 | train_rmse: 0.4550 | train_pccs: 0.7545
epoch:    4 | valid_loss: 1.3945 | valid_mae: 0.2333 | valid_r2: 0.6935 | valid_rmse: 0.5379 | valid_pccs: 0.7363
epoch:    5 | train_loss: 3.1736 | train_mae: 0.2214 | train_r2: 0.6987 | train_

In [15]:
from scipy.stats import pearsonr
from sklearn.metrics import mean_absolute_error, r2_score
print("train")

for i in range(Y.shape[1]):
    mae = mean_absolute_error(Y.iloc[:,i], oof_real_train[:,i])
    r2 = r2_score(Y.iloc[:,i], oof_real_train[:,i])
    rmse = np.sqrt(((Y.iloc[:, i] - np.array(oof_real_train[:,i])) ** 2).mean())
    pccs = pearsonr(Y.iloc[:, i], oof_real_train[:,i])[0]
    mae, r2, rmse, pccs = np.around([mae, r2, rmse, pccs], decimals=4)
    print(f'property{i+1}: mae:{mae}, rmse:{rmse}, r2:{r2}, pccs:{pccs}')

print("valid")

for i in range(Y.shape[1]):
    mae = mean_absolute_error(Y.iloc[:,i], oof_train[:,i])
    r2 = r2_score(Y.iloc[:,i], oof_train[:,i])
    rmse = np.sqrt(((Y.iloc[:, i] - np.array(oof_train[:,i])) ** 2).mean())
    pccs = pearsonr(Y.iloc[:, i], oof_train[:,i])[0]
    mae, r2, rmse, pccs = np.around([mae, r2, rmse, pccs], decimals=4)
    print(f'property{i+1}: mae:{mae}, rmse:{rmse}, r2:{r2}, pccs:{pccs}')

print("test")
for i in range(Y_test.shape[1]):
    mae = mean_absolute_error(Y_test.iloc[:,i], pred[:,i])
    r2 = r2_score(Y_test.iloc[:,i], pred[:,i])
    rmse = np.sqrt(((Y_test.iloc[:, i] - np.array(pred[:,i])) ** 2).mean())
    pccs = pearsonr(Y_test.iloc[:, i], pred[:,i])[0]
    mae, r2, rmse, pccs = np.around([mae, r2, rmse, pccs], decimals=4)
    print(f'property{i+1}: mae:{mae}, rmse:{rmse}, r2:{r2}, pccs:{pccs}')

train
property1: mae:0.091, rmse:0.1636, r2:0.9744, pccs:0.9872
property2: mae:0.0563, rmse:0.0782, r2:0.9938, pccs:0.9969
property3: mae:0.0583, rmse:0.1084, r2:0.9881, pccs:0.994
property4: mae:0.0486, rmse:0.0685, r2:0.9953, pccs:0.9976
property5: mae:0.1551, rmse:1.0814, r2:0.1419, pccs:0.4652
property6: mae:0.4654, rmse:0.6309, r2:0.601, pccs:0.7767
valid
property1: mae:0.1249, rmse:0.2319, r2:0.9486, pccs:0.974
property2: mae:0.0699, rmse:0.0997, r2:0.9899, pccs:0.9949
property3: mae:0.0749, rmse:0.1314, r2:0.9825, pccs:0.9912
property4: mae:0.0637, rmse:0.0912, r2:0.9916, pccs:0.9958
property5: mae:0.2068, rmse:1.1945, r2:-0.0471, pccs:-0.0216
property6: mae:0.5253, rmse:0.7112, r2:0.4929, pccs:0.7034
test
property1: mae:0.3696, rmse:0.508, r2:0.7074, pccs:0.8626
property2: mae:0.2264, rmse:0.2834, r2:0.9228, pccs:0.9742
property3: mae:0.1412, rmse:0.1875, r2:0.9658, pccs:0.9851
property4: mae:0.2333, rmse:0.3161, r2:0.9024, pccs:0.9637
property5: mae:0.1111, rmse:0.4001, r2:-0.

## p13

In [2]:
train = pd.read_csv("../data/preprocessed/p13_train_fe_hs.csv")
train['istrain'] = 1
test = pd.read_csv("../data/preprocessed/p13_test_fe_hs.csv")
test['istrain'] = 0
df = pd.concat([train, test]).reset_index(drop=True)
df

Unnamed: 0,element 1,element 2,element 3,element 4,element 5,element 6,element 7,element 8,element 9,element 10,...,element 10 element 11,element 10 element 12,element 11 element 12,property 1,property 2,property 3,property 4,property 5,property 6,istrain
0,0.673956,0.174380,0.018852,0.122537,0.009426,0.000000,0.000424,0.000283,0.000141,0.000000,...,0.000000,0.000000e+00,0.000000e+00,0.317895,0.589088,0.752391,0.328302,-0.206129,0.44283,1
1,0.010422,0.000000,0.009474,0.312648,0.001421,0.661298,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000e+00,0.000000e+00,-0.108256,-0.574822,-0.221364,-0.926633,0.022768,-0.68232,1
2,0.959647,0.004798,0.016314,0.009596,0.004798,0.002879,0.000192,0.001727,0.000048,0.000000,...,0.000000,0.000000e+00,0.000000e+00,-0.092087,1.220121,0.834930,1.319043,-0.018376,0.44283,1
3,0.002357,0.000000,0.000000,0.009428,0.000047,0.867352,0.000283,0.000000,0.000754,0.113133,...,0.000747,5.332933e-06,3.110877e-07,-0.106166,-0.858744,-0.568540,-0.930921,0.023050,1.04291,1
4,0.004180,0.000000,0.000000,0.010450,0.000052,0.856934,0.015676,0.000000,0.000836,0.104504,...,0.000764,5.460556e-06,3.822389e-07,1.642237,-0.833978,-0.434720,-0.930877,0.242608,0.21780,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1955,0.454907,0.248855,0.039817,0.228947,0.006968,0.019908,0.000299,0.000199,0.000100,0.000000,...,0.000000,0.000000e+00,0.000000e+00,0.474445,0.200240,0.660588,-0.332144,-0.728097,2.46810,0
1956,0.017323,0.000000,0.014436,0.105861,0.000962,0.856510,0.000000,0.000000,0.000000,0.004812,...,0.000000,4.630792e-07,0.000000e+00,-0.108256,-0.833092,-0.102687,-0.927217,0.022777,0.74287,0
1957,0.824504,0.144481,0.009632,0.008187,0.009632,0.000000,0.000385,0.002890,0.000289,0.000000,...,0.000000,0.000000e+00,0.000000e+00,-0.091459,0.897061,0.799480,0.861857,0.011591,0.14279,0
1958,0.199960,0.229954,0.009998,0.537892,0.009998,0.004999,0.000400,0.001500,0.000300,0.000000,...,0.000000,0.000000e+00,0.000000e+00,1.503417,-0.287795,0.468611,-0.790559,0.491971,-1.20739,0


In [3]:
X_name = df.columns.tolist()[:-7]
Y_name = [f'property {i}' for i in range(1,7)]

poolx = df[X_name].copy(deep=True)
pooly = df[Y_name].copy(deep=True)

X = poolx.iloc[df[df['istrain']==1].index, :]
Y = pooly.iloc[df[df['istrain']==1].index, :]

X_test = poolx.iloc[df[df['istrain']==0].index, :].reset_index(drop=True)
Y_test = pooly.iloc[df[df['istrain']==0].index, :].reset_index(drop=True)

pred = torch.zeros((X_test.shape[0], 6))
print(X.shape, X_test.shape)

(1372, 79) (588, 79)


In [4]:
from sklearn.model_selection import KFold
from utils.data import MyDataset
from torch.utils.data import DataLoader
from utils.trainer import trainer
from models.ft_transformer import FT_Transformer

os.environ["CUDA_DEVICE_ORDER"] = 'PCI_BUS_ID'
os.environ["CUDA_VISIBLE_DEVICES"] = '0'

device = 'cuda' if torch.cuda.is_available() else 'cpu'

kf = KFold(n_splits=5, random_state=2022, shuffle=True)

oof_train = torch.zeros((X.shape[0], 6))
oof_real_train = torch.zeros((X.shape[0], 6))

use_cat=False


for now_fold, (trn_idx, val_idx) in enumerate(kf.split(X, Y)):
    print(f"Fold: {now_fold}")
    X_train, X_val = np.array(X.iloc[trn_idx]), np.array(X.iloc[val_idx])
    y_train, y_val = np.array(Y.iloc[trn_idx]), np.array(Y.iloc[val_idx])
    
    X_train, X_val = torch.tensor(X_train).float(), torch.tensor(X_val).float()
    y_train, y_val = torch.tensor(y_train).float(), torch.tensor(y_val).float()
    
    train_dataset = MyDataset(X_train, y_train)
    valid_dataset = MyDataset(X_val, y_val)
    
    train_dataloader = DataLoader(train_dataset, batch_size=128, shuffle=True)
    valid_dataloader = DataLoader(valid_dataset, batch_size=128, shuffle=True)
    
    
    model = FT_Transformer(d_numerical=X.shape[1], categories=None, token_bias=True, n_layers=3, d_token=512, n_heads=4,
                           d_ffn_factor=3.0, attention_dropout=0.2, ffn_dropout=0.2,residual_dropout=0.2)
                      
    final_model = trainer(model, train_dataloader, valid_dataloader, device, max_epochs=2000, early_stop=20, use_cat=use_cat)
    final_model = final_model.to('cpu')
    if use_cat:
        oof_train[val_idx]= final_model(X_val[:,:-12], X_val[:,-12:].int() ).detach()
        pred += final_model(
            torch.Tensor(X_test.values[:,:-12]), torch.Tensor(X_test.values[:,-12:]).int()
                            ).detach() / 5
    else:
        oof_train[val_idx] = final_model(X_val, None).detach()
        oof_real_train[trn_idx] += final_model(X_train, None).detach() / 4
        pred += final_model(torch.Tensor(X_test.values).long(), None).detach() /5

Fold: 0
epoch:    1 | train_loss: 15.1384 | train_mae: 0.8274 | train_r2: -0.9020 | train_rmse: 1.2840 | train_pccs: -0.0135
epoch:    1 | valid_loss: 20.2096 | valid_mae: 0.6397 | valid_r2: -0.0045 | valid_rmse: 1.5984 | valid_pccs: 0.3682
epoch:    2 | train_loss: 8.8429 | train_mae: 0.6061 | train_r2: -0.0240 | train_rmse: 0.9843 | train_pccs: 0.0089
epoch:    2 | valid_loss: 8.3272 | valid_mae: 0.6306 | valid_r2: -0.0117 | valid_rmse: 1.6030 | valid_pccs: 0.3695
epoch:    3 | train_loss: 8.7710 | train_mae: 0.6206 | train_r2: -0.0189 | train_rmse: 0.9830 | train_pccs: 0.0108
epoch:    3 | valid_loss: 31.4433 | valid_mae: 0.6282 | valid_r2: -0.0103 | valid_rmse: 1.6024 | valid_pccs: 0.2235
epoch:    4 | train_loss: 8.6789 | train_mae: 0.5890 | train_r2: -0.0109 | train_rmse: 0.9791 | train_pccs: -0.0014
epoch:    4 | valid_loss: 8.2351 | valid_mae: 0.6514 | valid_r2: -0.0214 | valid_rmse: 1.6060 | valid_pccs: 0.1975
epoch:    5 | train_loss: 8.8443 | train_mae: 0.6029 | train_r2: -0

In [5]:
from scipy.stats import pearsonr
from sklearn.metrics import mean_absolute_error, r2_score


print("train")

for i in range(Y.shape[1]):
    mae = mean_absolute_error(Y.iloc[:,i], oof_real_train[:,i])
    r2 = r2_score(Y.iloc[:,i], oof_real_train[:,i])
    rmse = np.sqrt(((Y.iloc[:, i] - np.array(oof_real_train[:,i])) ** 2).mean())
    pccs = pearsonr(Y.iloc[:, i], oof_real_train[:,i])[0]
    mae, r2, rmse, pccs = np.around([mae, r2, rmse, pccs], decimals=4)
    print(f'property{i+1}: mae:{mae}, rmse:{rmse}, r2:{r2}, pccs:{pccs}')

print("valid")

for i in range(Y.shape[1]):
    mae = mean_absolute_error(Y.iloc[:,i], oof_train[:,i])
    r2 = r2_score(Y.iloc[:,i], oof_train[:,i])
    rmse = np.sqrt(((Y.iloc[:, i] - np.array(oof_train[:,i])) ** 2).mean())
    pccs = pearsonr(Y.iloc[:, i], oof_train[:,i])[0]
    mae, r2, rmse, pccs = np.around([mae, r2, rmse, pccs], decimals=4)
    print(f'property{i+1}: mae:{mae}, rmse:{rmse}, r2:{r2}, pccs:{pccs}')


print("test")
for i in range(Y_test.shape[1]):
    mae = mean_absolute_error(Y_test.iloc[:,i], pred[:,i])
    r2 = r2_score(Y_test.iloc[:,i], pred[:,i])
    rmse = np.sqrt(((Y_test.iloc[:, i] - np.array(pred[:,i])) ** 2).mean())
    pccs = pearsonr(Y_test.iloc[:, i], pred[:,i])[0]
    mae, r2, rmse, pccs = np.around([mae, r2, rmse, pccs], decimals=4)
    print(f'property{i+1}: mae:{mae}, rmse:{rmse}, r2:{r2}, pccs:{pccs}')

train
property1: mae:0.2272, rmse:1.6059, r2:0.1772, pccs:0.445
property2: mae:0.0491, rmse:0.0777, r2:0.9923, pccs:0.9961
property3: mae:0.1385, rmse:0.3591, r2:0.8018, pccs:0.896
property4: mae:0.0621, rmse:0.0799, r2:0.9928, pccs:0.9964
property5: mae:0.2054, rmse:1.6236, r2:0.0144, pccs:0.1395
property6: mae:0.6065, rmse:0.7913, r2:0.3319, pccs:0.5766
valid
property1: mae:0.2568, rmse:1.7056, r2:0.0718, pccs:0.2717
property2: mae:0.0621, rmse:0.0978, r2:0.9877, pccs:0.9939
property3: mae:0.1582, rmse:0.3758, r2:0.7829, pccs:0.8851
property4: mae:0.0711, rmse:0.103, r2:0.9881, pccs:0.994
property5: mae:0.2221, rmse:1.6453, r2:-0.0121, pccs:-0.0276
property6: mae:0.611, rmse:0.8003, r2:0.3167, pccs:0.5628
test
property1: mae:2.8707, rmse:2.8929, r2:-70.7586, pccs:0.0074
property2: mae:1.2638, rmse:1.4921, r2:-1.6617, pccs:0.0106
property3: mae:0.7979, rmse:0.8934, r2:-0.4046, pccs:0.1761
property4: mae:1.442, rmse:1.733, r2:-2.2609, pccs:-0.2203
property5: mae:0.4637, rmse:0.6747, r2

## p14

In [6]:
train = pd.read_csv("../data/preprocessed/p14_train_fe_hsp.csv")
train['istrain'] = 1
test = pd.read_csv("../data/preprocessed/p14_test_fe_hsp.csv")
test['istrain'] = 0
df = pd.concat([train, test]).reset_index(drop=True)
df

Unnamed: 0,element 1,element 2,element 3,element 4,element 5,element 6,element 7,element 8,element 9,element 10,...,phs_element 10 element 11,phs_element 10 element 12,phs_element 11 element 12,property 1,property 2,property 3,property 4,property 5,property 6,istrain
0,0.673956,0.174380,0.018852,0.122537,0.009426,0.000000,0.000424,0.000283,0.000141,0.000000,...,-0.000000,-0.000000e+00,-0.000000e+00,0.317895,0.589088,0.752391,0.328302,-0.206129,0.44283,1
1,0.010422,0.000000,0.009474,0.312648,0.001421,0.661298,0.000000,0.000000,0.000000,0.000000,...,-0.000000,-0.000000e+00,-0.000000e+00,-0.108256,-0.574822,-0.221364,-0.926633,0.022768,-0.68232,1
2,0.959647,0.004798,0.016314,0.009596,0.004798,0.002879,0.000192,0.001727,0.000048,0.000000,...,-0.000000,-0.000000e+00,-0.000000e+00,-0.092087,1.220121,0.834930,1.319043,-0.018376,0.44283,1
3,0.002357,0.000000,0.000000,0.009428,0.000047,0.867352,0.000283,0.000000,0.000754,0.113133,...,-0.000351,-2.506537e-06,-1.462147e-07,-0.106166,-0.858744,-0.568540,-0.930921,0.023050,1.04291,1
4,0.004180,0.000000,0.000000,0.010450,0.000052,0.856934,0.015676,0.000000,0.000836,0.104504,...,-0.000418,-2.986739e-06,-2.090717e-07,1.642237,-0.833978,-0.434720,-0.930877,0.242608,0.21780,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1955,0.454907,0.248855,0.039817,0.228947,0.006968,0.019908,0.000299,0.000199,0.000100,0.000000,...,-0.000000,-0.000000e+00,-0.000000e+00,0.474445,0.200240,0.660588,-0.332144,-0.728097,2.46810,0
1956,0.017323,0.000000,0.014436,0.105861,0.000962,0.856510,0.000000,0.000000,0.000000,0.004812,...,-0.000000,-2.477832e-07,-0.000000e+00,-0.108256,-0.833092,-0.102687,-0.927217,0.022777,0.74287,0
1957,0.824504,0.144481,0.009632,0.008187,0.009632,0.000000,0.000385,0.002890,0.000289,0.000000,...,-0.000000,-0.000000e+00,-0.000000e+00,-0.091459,0.897061,0.799480,0.861857,0.011591,0.14279,0
1958,0.199960,0.229954,0.009998,0.537892,0.009998,0.004999,0.000400,0.001500,0.000300,0.000000,...,-0.000000,-0.000000e+00,-0.000000e+00,1.503417,-0.287795,0.468611,-0.790559,0.491971,-1.20739,0


In [7]:
X_name = df.columns.tolist()[:-7]
Y_name = [f'property {i}' for i in range(1,7)]

poolx = df[X_name].copy(deep=True)
pooly = df[Y_name].copy(deep=True)

X = poolx.iloc[df[df['istrain']==1].index, :]
Y = pooly.iloc[df[df['istrain']==1].index, :]

X_test = poolx.iloc[df[df['istrain']==0].index, :].reset_index(drop=True)
Y_test = pooly.iloc[df[df['istrain']==0].index, :].reset_index(drop=True)

pred = torch.zeros((X_test.shape[0], 6))
print(X.shape, X_test.shape)

(1372, 145) (588, 145)


In [8]:
from sklearn.model_selection import KFold
from utils.data import MyDataset
from torch.utils.data import DataLoader
from utils.trainer import trainer
from models.ft_transformer import FT_Transformer

os.environ["CUDA_DEVICE_ORDER"] = 'PCI_BUS_ID'
os.environ["CUDA_VISIBLE_DEVICES"] = '0'

device = 'cuda' if torch.cuda.is_available() else 'cpu'

kf = KFold(n_splits=5, random_state=2022, shuffle=True)

oof_train = torch.zeros((X.shape[0], 6))
oof_real_train = torch.zeros((X.shape[0], 6))

use_cat=False


for now_fold, (trn_idx, val_idx) in enumerate(kf.split(X, Y)):
    print(f"Fold: {now_fold}")
    X_train, X_val = np.array(X.iloc[trn_idx]), np.array(X.iloc[val_idx])
    y_train, y_val = np.array(Y.iloc[trn_idx]), np.array(Y.iloc[val_idx])
    
    X_train, X_val = torch.tensor(X_train).float(), torch.tensor(X_val).float()
    y_train, y_val = torch.tensor(y_train).float(), torch.tensor(y_val).float()
    
    train_dataset = MyDataset(X_train, y_train)
    valid_dataset = MyDataset(X_val, y_val)
    
    train_dataloader = DataLoader(train_dataset, batch_size=128, shuffle=True)
    valid_dataloader = DataLoader(valid_dataset, batch_size=128, shuffle=True)
    
    
    model = FT_Transformer(d_numerical=X.shape[1], categories=None, token_bias=True, n_layers=3, d_token=256, n_heads=4,
                           d_ffn_factor=3.0, attention_dropout=0.2, ffn_dropout=0.2,residual_dropout=0.2)
                      
    final_model = trainer(model, train_dataloader, valid_dataloader, device, max_epochs=2000, early_stop=20, use_cat=use_cat)
    final_model = final_model.to('cpu')
    if use_cat:
        oof_train[val_idx]= final_model(X_val[:,:-12], X_val[:,-12:].int() ).detach()
        pred += final_model(
            torch.Tensor(X_test.values[:,:-12]), torch.Tensor(X_test.values[:,-12:]).int()
                            ).detach() / 5
    else:
        oof_train[val_idx] = final_model(X_val, None).detach()
        oof_real_train[trn_idx] += final_model(X_train, None).detach() / 4
        pred += final_model(torch.Tensor(X_test.values).float(), None).detach() /5

Fold: 0
epoch:    1 | train_loss: 12.1396 | train_mae: 0.7539 | train_r2: -0.4414 | train_rmse: 1.1322 | train_pccs: 0.0002
epoch:    1 | valid_loss: 9.2185 | valid_mae: 0.6224 | valid_r2: -0.0176 | valid_rmse: 1.6071 | valid_pccs: 0.4855
epoch:    2 | train_loss: 8.7799 | train_mae: 0.6022 | train_r2: -0.0210 | train_rmse: 0.9840 | train_pccs: -0.0097
epoch:    2 | valid_loss: 8.3429 | valid_mae: 0.6544 | valid_r2: -0.0343 | valid_rmse: 1.6124 | valid_pccs: 0.3221
epoch:    3 | train_loss: 8.6832 | train_mae: 0.5958 | train_r2: -0.0142 | train_rmse: 0.9804 | train_pccs: -0.0170
epoch:    3 | valid_loss: 8.3428 | valid_mae: 0.6368 | valid_r2: -0.0241 | valid_rmse: 1.6082 | valid_pccs: 0.5153
epoch:    4 | train_loss: 9.4175 | train_mae: 0.5933 | train_r2: -0.0038 | train_rmse: 0.9756 | train_pccs: 0.0068
epoch:    4 | valid_loss: 31.3920 | valid_mae: 0.6327 | valid_r2: -0.0042 | valid_rmse: 1.6000 | valid_pccs: 0.1494
epoch:    5 | train_loss: 9.0384 | train_mae: 0.5920 | train_r2: 0.0

In [9]:
from scipy.stats import pearsonr
from sklearn.metrics import mean_absolute_error, r2_score
print("train")

for i in range(Y.shape[1]):
    mae = mean_absolute_error(Y.iloc[:,i], oof_real_train[:,i])
    r2 = r2_score(Y.iloc[:,i], oof_real_train[:,i])
    rmse = np.sqrt(((Y.iloc[:, i] - np.array(oof_real_train[:,i])) ** 2).mean())
    pccs = pearsonr(Y.iloc[:, i], oof_real_train[:,i])[0]
    mae, r2, rmse, pccs = np.around([mae, r2, rmse, pccs], decimals=4)
    print(f'property{i+1}: mae:{mae}, rmse:{rmse}, r2:{r2}, pccs:{pccs}')

print("valid")

for i in range(Y.shape[1]):
    mae = mean_absolute_error(Y.iloc[:,i], oof_train[:,i])
    r2 = r2_score(Y.iloc[:,i], oof_train[:,i])
    rmse = np.sqrt(((Y.iloc[:, i] - np.array(oof_train[:,i])) ** 2).mean())
    pccs = pearsonr(Y.iloc[:, i], oof_train[:,i])[0]
    mae, r2, rmse, pccs = np.around([mae, r2, rmse, pccs], decimals=4)
    print(f'property{i+1}: mae:{mae}, rmse:{rmse}, r2:{r2}, pccs:{pccs}')

print("test")
for i in range(Y_test.shape[1]):
    mae = mean_absolute_error(Y_test.iloc[:,i], pred[:,i])
    r2 = r2_score(Y_test.iloc[:,i], pred[:,i])
    rmse = np.sqrt(((Y_test.iloc[:, i] - np.array(pred[:,i])) ** 2).mean())
    pccs = pearsonr(Y_test.iloc[:, i], pred[:,i])[0]
    mae, r2, rmse, pccs = np.around([mae, r2, rmse, pccs], decimals=4)
    print(f'property{i+1}: mae:{mae}, rmse:{rmse}, r2:{r2}, pccs:{pccs}')

train
property1: mae:0.2219, rmse:1.6125, r2:0.1704, pccs:0.4273
property2: mae:0.0771, rmse:0.1015, r2:0.9868, pccs:0.9939
property3: mae:0.1316, rmse:0.3643, r2:0.7961, pccs:0.895
property4: mae:0.0729, rmse:0.0874, r2:0.9914, pccs:0.9964
property5: mae:0.2062, rmse:1.6211, r2:0.0174, pccs:0.1457
property6: mae:0.6068, rmse:0.7855, r2:0.3416, pccs:0.585
valid
property1: mae:0.2415, rmse:1.6761, r2:0.1036, pccs:0.3223
property2: mae:0.0916, rmse:0.1264, r2:0.9795, pccs:0.99
property3: mae:0.1508, rmse:0.4022, r2:0.7513, pccs:0.8679
property4: mae:0.0964, rmse:0.1254, r2:0.9823, pccs:0.9914
property5: mae:0.2203, rmse:1.647, r2:-0.0143, pccs:-0.0235
property6: mae:0.6175, rmse:0.8015, r2:0.3146, pccs:0.5613
test
property1: mae:1.2488, rmse:1.2651, r2:-12.722, pccs:0.4148
property2: mae:1.1245, rmse:1.2823, r2:-0.9658, pccs:0.0838
property3: mae:0.7717, rmse:0.8656, r2:-0.3187, pccs:0.2174
property4: mae:1.2042, rmse:1.4851, r2:-1.3949, pccs:0.2188
property5: mae:0.2518, rmse:0.5724, r2

## Mydata

In [87]:
train = pd.read_csv("../data/preprocessed/p10_train.csv")
train['istrain'] = 1
test = pd.read_csv("../data/preprocessed/p10_test.csv")
test['istrain'] = 0
df = pd.concat([train, test]).reset_index(drop=True)
df

Unnamed: 0,element 1,element 2,element 3,element 4,element 5,element 6,element 7,element 8,element 9,element 10,element 11,element 12,property 1,property 2,property 3,property 4,property 5,property 6,istrain
0,0.673956,0.174380,0.018852,0.122537,0.009426,0.000000,0.000424,0.000283,0.000141,0.000000,0.000000,0.000000,0.317895,0.589088,0.752391,0.328302,-0.206129,0.44283,1
1,0.010422,0.000000,0.009474,0.312648,0.001421,0.661298,0.000000,0.000000,0.000000,0.000000,0.004737,0.000000,-0.108256,-0.574822,-0.221364,-0.926633,0.022768,-0.68232,1
2,0.959647,0.004798,0.016314,0.009596,0.004798,0.002879,0.000192,0.001727,0.000048,0.000000,0.000000,0.000000,-0.092087,1.220121,0.834930,1.319043,-0.018376,0.44283,1
3,0.002357,0.000000,0.000000,0.009428,0.000047,0.867352,0.000283,0.000000,0.000754,0.113133,0.006599,0.000047,-0.106166,-0.858744,-0.568540,-0.930921,0.023050,1.04291,1
4,0.004180,0.000000,0.000000,0.010450,0.000052,0.856934,0.015676,0.000000,0.000836,0.104504,0.007315,0.000052,1.642237,-0.833978,-0.434720,-0.930877,0.242608,0.21780,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1955,0.454907,0.248855,0.039817,0.228947,0.006968,0.019908,0.000299,0.000199,0.000100,0.000000,0.000000,0.000000,0.474445,0.200240,0.660588,-0.332144,-0.728097,2.46810,0
1956,0.017323,0.000000,0.014436,0.105861,0.000962,0.856510,0.000000,0.000000,0.000000,0.004812,0.000000,0.000096,-0.108256,-0.833092,-0.102687,-0.927217,0.022777,0.74287,0
1957,0.824504,0.144481,0.009632,0.008187,0.009632,0.000000,0.000385,0.002890,0.000289,0.000000,0.000000,0.000000,-0.091459,0.897061,0.799480,0.861857,0.011591,0.14279,0
1958,0.199960,0.229954,0.009998,0.537892,0.009998,0.004999,0.000400,0.001500,0.000300,0.000000,0.004999,0.000000,1.503417,-0.287795,0.468611,-0.790559,0.491971,-1.20739,0


In [88]:
# df = pd.read_csv("data/toydata_clean.csv")
X_name = [f'element {i}' for i in range(1,13)]
Y_name = [f'property {i}' for i in range(1,7)]

from utils.featurizer import get_feature
X = df[X_name]
X, fea_dict = get_feature(X, X_name, qcut_fea=True, entropy_fea=True)

old_array.shape: (1960, 12)
new_array.shape: (1960, 276)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1960 entries, 0 to 1959
Columns: 264 entries, element 1_div_element 2 to element 11_sum_element 12
dtypes: float64(264)
memory usage: 3.9 MB
None
old_array.shape: (1960, 12)
new_array.shape: (1960, 276)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1960 entries, 0 to 1959
Columns: 264 entries, element 1_entropy_div_element 2_entropy to element 11_entropy_sum_element 12_entropy
dtypes: float32(264)
memory usage: 2.0 MB
None


In [89]:
Y = df[Y_name]
poolx = X.copy(deep=True)
pooly = Y.copy(deep=True)

X = poolx.iloc[df[df['istrain']==1].index, :]
Y = pooly.iloc[df[df['istrain']==1].index, :]

X_test = poolx.iloc[df[df['istrain']==0].index, :].reset_index(drop=True)
Y_test = pooly.iloc[df[df['istrain']==0].index, :].reset_index(drop=True)

pred = torch.zeros((X_test.shape[0], 6))

In [91]:
X_test

Unnamed: 0,element 1,element 2,element 3,element 4,element 5,element 6,element 7,element 8,element 9,element 10,...,element 3_qcut,element 4_qcut,element 5_qcut,element 6_qcut,element 7_qcut,element 8_qcut,element 9_qcut,element 10_qcut,element 11_qcut,element 12_qcut
0,0.884340,0.097934,0.005876,0.000000,0.009793,0.000000,0.000294,0.001469,0.000294,0.000000,...,5,5,16,5,10,12,14,10,10,9
1,0.889918,0.093624,0.005913,0.003942,0.004928,0.000000,0.000197,0.001183,0.000099,0.000000,...,5,9,9,5,6,11,6,10,10,14
2,0.969154,0.011807,0.008855,0.000000,0.003936,0.002952,0.000246,0.002263,0.000295,0.000000,...,8,5,7,12,7,14,14,10,10,15
3,0.602128,0.000934,0.014003,0.336072,0.018671,0.004668,0.000000,0.022405,0.001120,0.000000,...,14,18,18,13,3,19,19,10,10,9
4,0.001494,0.000000,0.000199,0.000000,0.001494,0.000299,0.000000,0.000000,0.000000,0.000299,...,2,5,3,8,3,3,3,15,14,19
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
583,0.454907,0.248855,0.039817,0.228947,0.006968,0.019908,0.000299,0.000199,0.000100,0.000000,...,19,18,12,15,10,5,6,15,14,13
584,0.017323,0.000000,0.014436,0.105861,0.000962,0.856510,0.000000,0.000000,0.000000,0.004812,...,15,16,3,18,4,5,5,17,14,13
585,0.824504,0.144481,0.009632,0.008187,0.009632,0.000000,0.000385,0.002890,0.000289,0.000000,...,11,11,15,8,15,16,13,15,14,13
586,0.199960,0.229954,0.009998,0.537892,0.009998,0.004999,0.000400,0.001500,0.000300,0.000000,...,12,19,16,13,17,12,15,15,19,13


In [92]:
from sklearn.model_selection import KFold
from utils.data import MyDataset
from torch.utils.data import DataLoader
from utils.trainer import trainer
from models.ft_transformer import FT_Transformer

os.environ["CUDA_DEVICE_ORDER"] = 'PCI_BUS_ID'
os.environ["CUDA_VISIBLE_DEVICES"] = '0'

device = 'cuda' if torch.cuda.is_available() else 'cpu'

kf = KFold(n_splits=5, random_state=2022, shuffle=True)
oof_train = torch.zeros((X.shape[0], 6))
oof_real_train = torch.zeros((X.shape[0], 6))

use_cat=True


for now_fold, (trn_idx, val_idx) in enumerate(kf.split(X, Y)):
    print(f"Fold: {now_fold}")
    X_train, X_val = np.array(X.iloc[trn_idx]), np.array(X.iloc[val_idx])
    y_train, y_val = np.array(Y.iloc[trn_idx]), np.array(Y.iloc[val_idx])
    
    X_train, X_val = torch.tensor(X_train).float(), torch.tensor(X_val).float()
    y_train, y_val = torch.tensor(y_train).float(), torch.tensor(y_val).float()
    
    train_dataset = MyDataset(X_train, y_train)
    valid_dataset = MyDataset(X_val, y_val)
    
    train_dataloader = DataLoader(train_dataset, batch_size=128, shuffle=True)
    valid_dataloader = DataLoader(valid_dataset, batch_size=128, shuffle=True)
    
    
    model = FT_Transformer(d_numerical=X.shape[1]-12, categories=[int(20) for _ in range(12)], token_bias=True, n_layers=3, d_token=256, n_heads=4,
                           d_ffn_factor=3.0, attention_dropout=0.2, ffn_dropout=0.2,residual_dropout=0.2)
                      
    final_model = trainer(model, train_dataloader, valid_dataloader, device, max_epochs=2000, early_stop=20, use_cat=use_cat)
    final_model = final_model.to('cpu')
    if use_cat:
        oof_train[val_idx]= final_model(X_val[:,:-12], X_val[:,-12:].int() ).detach()
        pred += final_model(
            torch.Tensor(X_test.values[:,:-12]), torch.Tensor(X_test.values[:,-12:]).int()
                            ).detach() / 5
        oof_real_train[trn_idx] += final_model(X_train[:,:-12], X_train[:,-12:].int()).detach() / 4
    else:
        oof_train[val_idx] = final_model(X_val).detach()
        pred += final_model(  torch.Tensor(X_test).long()  ).detach() /5

Fold: 0
epoch:    1 | train_loss: 8.4232 | train_mae: 0.5866 | train_r2: 0.0247 | train_rmse: 0.9651 | train_pccs: 0.3576
epoch:    1 | valid_loss: 7.0922 | valid_mae: 0.4122 | valid_r2: 0.3944 | valid_rmse: 1.3557 | valid_pccs: 0.5876
epoch:    2 | train_loss: 5.5939 | train_mae: 0.3289 | train_r2: 0.4596 | train_rmse: 0.7021 | train_pccs: 0.6042
epoch:    2 | valid_loss: 6.9141 | valid_mae: 0.3439 | valid_r2: 0.4691 | valid_rmse: 1.2963 | valid_pccs: 0.6233
epoch:    3 | train_loss: 4.8304 | train_mae: 0.2850 | train_r2: 0.5211 | train_rmse: 0.6422 | train_pccs: 0.6568
epoch:    3 | valid_loss: 6.7340 | valid_mae: 0.3209 | valid_r2: 0.5059 | valid_rmse: 1.2450 | valid_pccs: 0.6414
epoch:    4 | train_loss: 4.6948 | train_mae: 0.2603 | train_r2: 0.5390 | train_rmse: 0.6185 | train_pccs: 0.6541
epoch:    4 | valid_loss: 6.7935 | valid_mae: 0.3161 | valid_r2: 0.5052 | valid_rmse: 1.2548 | valid_pccs: 0.6466
epoch:    5 | train_loss: 4.8021 | train_mae: 0.2531 | train_r2: 0.5563 | train_

In [94]:
from scipy.stats import pearsonr
from sklearn.metrics import mean_absolute_error, r2_score
print("train")

for i in range(Y.shape[1]):
    mae = mean_absolute_error(Y.iloc[:,i], oof_real_train[:,i])
    r2 = r2_score(Y.iloc[:,i], oof_real_train[:,i])
    rmse = np.sqrt(((Y.iloc[:, i] - np.array(oof_real_train[:,i])) ** 2).mean())
    pccs = pearsonr(Y.iloc[:, i], oof_real_train[:,i])[0]
    mae, r2, rmse, pccs = np.around([mae, r2, rmse, pccs], decimals=4)
    print(f'property{i+1}: mae:{mae}, rmse:{rmse}, r2:{r2}, pccs:{pccs}')

print("valid")

for i in range(Y.shape[1]):
    mae = mean_absolute_error(Y.iloc[:,i], oof_train[:,i])
    r2 = r2_score(Y.iloc[:,i], oof_train[:,i])
    rmse = np.sqrt(((Y.iloc[:, i] - np.array(oof_train[:,i])) ** 2).mean())
    pccs = pearsonr(Y.iloc[:, i], oof_train[:,i])[0]
    mae, r2, rmse, pccs = np.around([mae, r2, rmse, pccs], decimals=4)
    print(f'property{i+1}: mae:{mae}, rmse:{rmse}, r2:{r2}, pccs:{pccs}')

print("test")
for i in range(Y_test.shape[1]):
    mae = mean_absolute_error(Y_test.iloc[:,i], pred[:,i])
    r2 = r2_score(Y_test.iloc[:,i], pred[:,i])
    rmse = np.sqrt(((Y_test.iloc[:, i] - np.array(pred[:,i])) ** 2).mean())
    pccs = pearsonr(Y_test.iloc[:, i], pred[:,i])[0]
    mae, r2, rmse, pccs = np.around([mae, r2, rmse, pccs], decimals=4)
    print(f'property{i+1}: mae:{mae}, rmse:{rmse}, r2:{r2}, pccs:{pccs}')

train
property1: mae:0.0982, rmse:1.2457, r2:0.5049, pccs:0.826
property2: mae:0.0461, rmse:0.0683, r2:0.994, pccs:0.997
property3: mae:0.0444, rmse:0.0949, r2:0.9862, pccs:0.9931
property4: mae:0.0475, rmse:0.0702, r2:0.9945, pccs:0.9973
property5: mae:0.1648, rmse:1.4172, r2:0.2491, pccs:0.671
property6: mae:0.3712, rmse:0.5056, r2:0.7272, pccs:0.8558
valid
property1: mae:0.1933, rmse:1.6879, r2:0.091, pccs:0.3179
property2: mae:0.0653, rmse:0.102, r2:0.9867, pccs:0.9933
property3: mae:0.0939, rmse:0.2568, r2:0.8987, pccs:0.948
property4: mae:0.0631, rmse:0.0964, r2:0.9896, pccs:0.9948
property5: mae:0.2681, rmse:1.7702, r2:-0.1716, pccs:-0.1787
property6: mae:0.5128, rmse:0.709, r2:0.4636, pccs:0.6915
test
property1: mae:0.0568, rmse:0.1793, r2:0.7244, pccs:0.8915
property2: mae:0.0502, rmse:0.0739, r2:0.9935, pccs:0.9969
property3: mae:0.0713, rmse:0.2055, r2:0.9257, pccs:0.964
property4: mae:0.0467, rmse:0.0708, r2:0.9946, pccs:0.9973
property5: mae:0.1085, rmse:0.5457, r2:-0.0179