In [1]:
import numpy as np
import tensorflow as tf
import tensorflow_addons as tfa
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from cosy.models import CosyNet

In [2]:
df = pd.read_csv("/Users/tompope/Documents/python_enviroment/SoftwareDevelopment/soft_parameter_sharing/data/desulferization_data_w_tobacc.csv")
df.head()

Unnamed: 0.1,Unnamed: 0,framework,CO2_tp_lod,N2_tp_lod,SO2_tp_lod,RDF_electronegativity_2.00,RDF_electronegativity_2.00.1,RDF_electronegativity_2.01,RDF_electronegativity_2.03,RDF_electronegativity_2.04,...,RDF_vdWaalsVolume_28.53,RDF_vdWaalsVolume_29.01,RDF_vdWaalsVolume_29.51,RDF_vdWaalsVolume_30.00,PLD,LCD,density,VF,SAv,SAg
0,0,GIQXUA_clean,1.918899,0.01752,2.747258,0.000605,0.000606,0.000607,0.000609,0.000614,...,0.0,0.0,0.0,0.0,3.8687,4.89334,1.79283,0.13844,2675.75,1492.47
1,1,JALPAQ_clean,0.050226,0.027788,0.008087,0.000861,0.00087,0.000888,0.000916,0.000954,...,0.0,0.0,0.0,0.0,2.90553,3.9755,1.66088,0.01992,841.869,506.881
2,2,SAQQEH_clean,2.006302,0.249367,0.533491,0.000771,0.000781,0.000802,0.000833,0.000876,...,0.0,0.0,0.0,0.0,9.13279,10.56525,0.914389,0.35942,2030.21,2220.29
3,3,BEFNEH_clean,1.036156,0.033717,1.343768,0.000899,0.000899,0.0009,0.000903,0.00091,...,0.0,0.0,0.0,0.0,3.33494,4.2694,2.16564,0.0729,1864.99,861.172
4,4,GELVID_clean,0.552087,0.064768,0.027879,0.00082,0.000831,0.000852,0.000884,0.000928,...,0.0,0.0,0.0,0.0,2.43534,3.61901,1.57413,0.02776,1334.15,847.547


In [3]:
'''
  - read data into dataframe
  - generate class data
  - extract and split feature and class data
  - scale feature data
'''


df = df.sample(frac=1).reset_index(drop=True)
df.drop(columns=['Unnamed: 0'], inplace=True)

df = df[df.framework.str.startswith('str_')]


df['co2_selec'] = np.log1p(((0.848 + 0.002)*df.CO2_tp_lod)/(0.15*(df.N2_tp_lod + df.SO2_tp_lod)))
df['so2_selec'] = np.log1p(((0.848 + 0.15)*df.SO2_tp_lod)/(0.002*(df.N2_tp_lod + df.CO2_tp_lod)))
df['n2_selec'] = ((0.15 + 0.002)*df.N2_tp_lod)/(0.848*(df.CO2_tp_lod + df.SO2_tp_lod))


train_df_full, test_df = train_test_split(df, test_size=0.2, random_state=2020)

train_df, val_df = train_test_split(train_df_full, test_size=0.1, random_state=2020)

X_train = train_df.iloc[:, 4:-3].to_numpy().astype(np.float32)
y_CO2 = train_df.co2_selec.to_numpy().astype(np.float32)
y_N2 = train_df.n2_selec.to_numpy().astype(np.float32)
y_SO2 = train_df.so2_selec.to_numpy().astype(np.float32)

X_val = val_df.iloc[:, 4:-3].to_numpy().astype(np.float32)
y_CO2_val = val_df.co2_selec.to_numpy().astype(np.float32)
y_N2_val = val_df.n2_selec.to_numpy().astype(np.float32)
y_SO2_val = val_df.so2_selec.to_numpy().astype(np.float32)

X_test = test_df.iloc[:, 4:-3].to_numpy().astype(np.float32)
y_CO2_test =  test_df.co2_selec.to_numpy().astype(np.float32)
y_N2_test = test_df.n2_selec.to_numpy().astype(np.float32)
y_SO2_test = test_df.so2_selec.to_numpy().astype(np.float32)


sc = StandardScaler()


X_train = sc.fit_transform(X_train)
X_val = sc.transform(X_val)
X_test = sc.transform(X_test)

In [4]:
input_ = tf.keras.Input(shape=(X_train.shape[1],))
x = tf.keras.layers.Dense(1200, activation='relu')(input_)
x = tf.keras.layers.Dropout(0.1)(x)
x = tf.keras.layers.Dense(400, activation='relu')(x)
x = tf.keras.layers.Dropout(0.1)(x)
x = tf.keras.layers.Dense(500, activation='relu')(x)
x = tf.keras.layers.Dropout(0.1)(x)
x = tf.keras.layers.Dense(1, activation='relu')(x)

model = tf.keras.Model(inputs=input_, outputs=x)

Metal device set to: Apple M1


2023-02-17 14:49:00.863205: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2023-02-17 14:49:00.863596: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [5]:
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 345)]             0         
                                                                 
 dense (Dense)               (None, 1200)              415200    
                                                                 
 dropout (Dropout)           (None, 1200)              0         
                                                                 
 dense_1 (Dense)             (None, 400)               480400    
                                                                 
 dropout_1 (Dropout)         (None, 400)               0         
                                                                 
 dense_2 (Dense)             (None, 500)               200500    
                                                                 
 dropout_2 (Dropout)         (None, 500)               0     

In [6]:
cosy_model = CosyNet(
    model_config=model.get_config(),
    number_models=3,
    scalar=1.0
)

In [7]:
opt = tf.keras.optimizers.Adam(learning_rate=0.00025)

losses = {
    'output_1':'mean_squared_error',
    'output_2': 'mean_squared_error',
    'output_3': 'mean_squared_error',
    }

R2 = tfa.metrics.RSquare()

cosy_model.compile(
            optimizer=opt,
            loss=losses,
            metrics=[R2],
            )

In [8]:
cosy_model.fit(
    x=X_train, y=[y_N2, y_CO2, y_SO2],
    validation_data=(X_val, [y_N2_val, y_CO2_val, y_SO2_val]),
    batch_size=32, epochs = 4, verbose=1)

Epoch 1/4


2023-02-17 14:49:01.349519: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
2023-02-17 14:49:02.304687: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.




2023-02-17 14:49:17.332393: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/4
Epoch 3/4
Epoch 4/4

KeyboardInterrupt: 

In [None]:
from sklearn.metrics import r2_score
cosy_model.evaluate(X_test, [y_N2_test, y_CO2_test, y_SO2_test])
predictions = cosy_model.predict(X_test)

R2 = tfa.metrics.RSquare()

pred1 = np.array(predictions[0]).reshape(-1)
pred2 = np.array(predictions[1]).reshape(-1)
pred3 = np.array(predictions[2]).reshape(-1)


print(r2_score(y_N2_test, pred1))
print(r2_score(y_CO2_test, pred2))
print(r2_score(y_SO2_test, pred3))

In [None]:
"""
-0.7580055281232221
0.6818685427671367
0.6525857912596633
"""