## 논문
<img src="img/img1.png" style="max-width: 800px;">

### 데이터 부르기

In [None]:
import csv
import torch
import pandas as pd
df = pd.read_csv('OCM_matrix.csv')
df.head()

In [None]:
data = df.to_numpy()

## 데이터 Train, Validation, Test로 나누기

In [None]:
import numpy as np
r_train = 0.8
r_val = 0.1

ntrain = int(data.shape[0]*r_train)
nval = int(data.shape[0]*r_val)

np.random.shuffle(data)

data = torch.Tensor(data)

train = data[:ntrain,:]
val = data[ntrain:ntrain+nval,:]
test = data[ntrain+nval:,:]

train_x = train[:,1:]
train_y = train[:,:1]

val_x = val[:,1:]
val_y = val[:,:1]

test_x = test[:,1:]
test_y = test[:,:1]

### Normalization용 Mean, std계산

In [None]:
x_mean, x_std = train_x.mean(0), train_x.std(0)
x_std[x_std ==0] = 1
y_mean = 0
y_std = 1
train_x_norm = (train_x - x_mean)/x_std
train_y_norm = (train_y - y_mean)/y_std

val_x_norm = (val_x - x_mean)/x_std
val_y_norm = (val_y - y_mean)/y_std

test_x_norm = (test_x - x_mean)/x_std
test_y_norm = (test_y - y_mean)/y_std


### 모델

In [None]:
class NeuralNetwork(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.linear_layer_1 = torch.nn.Linear(105, 512)
        self.linear_layer_2 = torch.nn.Linear(512, 1)
        self.activation1 = torch.nn.Softplus()
        self.activation2 = torch.nn.Sigmoid()
        
    def forward(self,x):
        z = self.linear_layer_1(x)
        z = self.activation1(z)
        z = self.linear_layer_2(z)
        z = self.activation2(z)*100
        return z
    
Model = NeuralNetwork()

In [None]:
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(Model.parameters(),0.001)

### 학습

In [None]:
best_val_loss = torch.Tensor([float('inf')])
for i in range(1000):
    predicted_train_y_norm = Model(train_x_norm)
    train_loss = criterion(train_y_norm,predicted_train_y_norm)
    
    optimizer.zero_grad()
    train_loss.backward()
    optimizer.step()
    
    predicted_val_y_norm = Model(val_x_norm)
    val_loss = criterion(predicted_val_y_norm,val_y_norm)
    
    trainMeanAE = torch.mean(torch.abs(predicted_train_y_norm*y_std - train_y_norm*y_std))
    valMeanAE = torch.mean(torch.abs(predicted_val_y_norm*y_std - val_y_norm*y_std))
    if i % 100 == 0:
        print('%.2e'%train_loss,'%.2e'%val_loss,'%.2e'%trainMeanAE,'%.2e'%valMeanAE)
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(Model.state_dict(),'model.pth')

### 가장 좋은 모델 부르기

In [None]:
Model.load_state_dict(torch.load('model.pth'))
predicted_test_y_norm = Model(test_x_norm)
predicted_test_y = predicted_test_y_norm*y_std+y_mean

In [None]:
print(torch.sqrt(torch.mean((predicted_test_y-test_y)**2)))

In [None]:
import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
import matplotlib.pyplot as plt
plt.scatter(predicted_test_y.detach().numpy(),test_y.detach().numpy(),s=10,edgecolor='k')
plt.plot([0,100],[0,100],c='k')
plt.xlim(0,38)
plt.ylim(0,38)

## 새로운 촉매 예측
<img src="img/img2.png" style="max-width: 800px;">

In [None]:
descriptors = ['Ag','Al','Ar','Au','B','Ba','Be','Bi','Br','C','Ca','Cd','Ce','Cl','Co','Cr','Cs','Cu','Dy','Er',\
               'Eu','F','Fe','Ga','Gd','Ge','Ho','I','In','K','La','Li','Lu','Mg','Mn','Mo','N','Na','Nb','Nd','Ni',\
               'P','Pb','Pd','Pr','Pt','Rb','Re','Rh','Ru','S','Sb','Sc','Si','Sm','Sn','Sr','Ta','Tb','Te','Th','Ti',\
               'Tm','V','W','Y','Yb','Zn','Zr','Promotor_B','Promotor_Br','Promotor_Cl','Promotor_F','Promotor_P',\
               'Promotor_S','Support_Si','Support_La','Support_Ti','Support_Dy','Support_Pr','Support_Mg','Support_Ca',\
               'Support_Zn','Support_Y','Support_Nd','Support_Ce','Support_Sm','Support_Al','Support_Tb','Support_C',\
               'Support_Yb','Support_Zr','Support_Co','Temperature, K','p(CH4), bar','p(O2), bar','p(CH4)/p(O2)',\
               'P total, bar','Contact time, s','Impregnation','Mech. mixing','Precipitation','Pyrolysis','Sol-gel','Therm.decomp.']

x_to_test = [[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\
             0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\
             0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1073,0.4,0.08,4.8,1,0.04,1,0,0,0,0,0]]
x_to_test = np.array(x_to_test)

###  촉매 고르기
여러가지 원소와 서포트 프로모터 등은 93번째 열까지 있다. 93번째까지 열의 합이 100이다. 여기서 랜덤하게 5개를 고르고 wt를100이 돼개 합을해보자.

In [None]:
# https://numpy.org/doc/stable/reference/random/generated/numpy.random.choice.html
idx = np.random.choice(93,size=5,replace=False)
print(idx)

In [None]:
# 가장 큰 값 5개 찾기
wt = np.random.random(5)
print(wt)

In [None]:
# 합이 100이 되게하기
wt = wt/wt.sum()*100
print(wt)

In [None]:
x_to_test[0,idx] = wt
print(x_to_test)

In [None]:
# Normalize
x_to_test = torch.Tensor(x_to_test)
x_to_test_norm = (x_to_test - x_mean)/x_std
y_tested_norm = Model(x_to_test_norm)

In [None]:
y_test = y_tested_norm*y_std + y_mean
print(y_test)

In [None]:
for _ in range(10):
    x_to_test = [[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\
             0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\
             0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1073,0.4,0.08,4.8,1,0.04,1,0,0,0,0,0]]
    x_to_test = np.array(x_to_test)
    idx = np.random.choice(93,size=5,replace=False)
    wt = np.random.random(5)
    wt = wt/wt.sum()*100
    x_to_test[0,idx] = wt
    x_to_test = torch.Tensor(x_to_test)
    x_to_test_norm = (x_to_test - x_mean)/x_std
    y_tested_norm = Model(x_to_test_norm)
    y_test = y_tested_norm*y_std + y_mean
    for i in range(5):
        print(f'{wt[i]:.1f}% {descriptors[idx[i]]}',end=',')
    print(f'\n{y_test[0][0].tolist():.2f}% Yield\n')
    