In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import joblib

In [2]:
data_property_cell = pd.read_csv("../data/qmof_property_cell.csv", index_col=1) # index_col=1 id "filename"
data_property_cell = data_property_cell.drop("Unnamed: 0", axis=1) # drop numbers of rows
property_cell_cols = ["pld", "lcd", "a", "b", "c", "alpha", "betta", "gamma", "volume", "spacegroupNumber"]
data_property_cell = data_property_cell.loc[:,property_cell_cols].rename({"betta": "beta"}, axis=1)

data_linker_mordred = pd.read_csv("../data/mordred_descriptors.csv", index_col=0)

In [3]:
p = len(data_linker_mordred.values[np.isnan(data_linker_mordred)]) / len(data_linker_mordred) / len(data_linker_mordred.iloc[0])
print(f"Mordred data has {p*100:.2n} % of nan")

Mordred data has 25 % of nan


In [4]:
data_node = pd.read_csv("../data/node_descriptors.csv", index_col=0)
data_node = data_node.loc[data_node["n_types_metals"] == 1.]
data_node = data_node.loc[:, ['n_metals', 'Atomic_Number', 'Atomic_Weight',
                              'Atomic Radius', 'Mulliken EN', 'polarizability(A^3)', 'electron affinity(kJ/mol)']]


In [5]:
data_qmof = pd.concat([data_property_cell, data_linker_mordred, data_node], axis=1)
data_qmof.shape

(7463, 1843)

In [6]:
from pteproc_model import PreprocessingModel

In [7]:
# p_dorp means we drop columns with more than 5% of nan values
model = PreprocessingModel(p_drop=0.1, threshold=1e-6, normalizer="minmax")

In [8]:
data_transformed = model.fit_transform(data_qmof)

In [14]:
data_transformed

Unnamed: 0,pld,lcd,a,b,c,alpha,beta,gamma,volume,spacegroupNumber,ABC,ABCGG,nAcid,nBase,SpAbs_A,SpMax_A,SpDiam_A,SpAD_A,SpMAD_A,LogEE_A,VE1_A,VE2_A,VE3_A,VR1_A,VR2_A,VR3_A,nAromAtom,nAromBond,nAtom,nHeavyAtom,nSpiro,nBridgehead,nHetero,nH,nC,nN,nO,nS,nP,nF,nCl,nBr,nI,nX,ATS0dv,ATS1dv,ATS2dv,ATS3dv,ATS4dv,ATS5dv,...,JGI4,JGI5,JGI6,JGI7,JGI8,JGI9,JGI10,JGT10,Diameter,Radius,TopoShapeIndex,PetitjeanIndex,Vabc,VAdjMat,MWC01,MWC02,MWC03,MWC04,MWC05,MWC06,MWC07,MWC08,MWC09,MWC10,TMWC10,SRW02,SRW03,SRW04,SRW05,SRW06,SRW07,SRW08,SRW09,SRW10,TSRW10,MW,AMW,WPath,WPol,Zagreb1,Zagreb2,mZagreb1,mZagreb2,n_metals,Atomic_Number,Atomic_Weight,Atomic Radius,Mulliken EN,polarizability(A^3),electron affinity(kJ/mol)
boydwoo_str_m3_o3_o28_pcu_sym_255,0.410619,0.479118,0.450473,0.464028,0.365010,0.583960,0.527963,0.441757,0.169915,0.000000,0.313738,0.398765,0.238095,0.0,0.287930,0.642940,0.642940,0.287930,0.748569,0.716776,0.467770,0.128609,0.697689,0.030697,0.031120,0.693763,0.247863,0.218519,0.289157,0.314465,0.0,0.0,0.297297,0.215686,0.243056,0.166667,0.285714,0.000000,0.000000,0.0,0.000000,0.0000,0.0,0.0000,0.344689,0.333936,0.374391,0.370644,0.308036,0.247148,...,0.288771,0.255020,0.389914,0.391794,0.385873,0.370752,0.398891,0.547142,1.400000e-07,7.000000e-08,0.869444,0.929931,0.286687,0.743404,0.298667,0.771327,0.802663,0.828807,0.848010,0.842038,0.832371,0.824627,0.818142,0.813061,0.450427,0.722518,0.0,0.790172,0.000000,0.838656,0.000000,0.826722,0.000000,0.807365,0.396779,0.202268,0.163142,2.113667e-07,0.355330,0.310532,0.312528,0.358272,0.298246,0.090909,0.303371,0.252930,0.133333,0.841924,0.029197,0.014184
gmof_Al2O6-BDC_B-irmof7_B_No138,0.082168,0.118327,0.098236,0.197062,0.095802,0.487432,0.274687,0.215255,0.011610,0.000000,0.137000,0.204804,0.142857,0.0,0.117509,0.606735,0.606735,0.117509,0.656176,0.534575,0.284005,0.244998,0.513221,0.005254,0.012209,0.506666,0.102564,0.094444,0.138554,0.132075,0.0,0.0,0.081081,0.132353,0.125000,0.000000,0.142857,0.000000,0.000000,0.0,0.000000,0.0000,0.0,0.0000,0.158140,0.137785,0.152425,0.130252,0.092014,0.048847,...,0.357226,0.272727,0.662667,0.000000,0.000000,0.000000,0.000000,0.590217,6.000000e-08,3.000000e-08,0.750000,0.857143,0.129610,0.577251,0.124000,0.618705,0.666556,0.707310,0.736499,0.742055,0.741154,0.740731,0.739897,0.739545,0.291725,0.544969,0.0,0.648658,0.000000,0.723073,0.000000,0.734785,0.000000,0.732141,0.252222,0.085351,0.138377,2.030000e-08,0.137056,0.128664,0.126162,0.169511,0.114662,0.000000,0.112360,0.086723,0.000000,0.422680,0.063869,0.158865
gmof_Uio66Zr-irmof20_A-irmof8_A_No139,0.117807,0.226642,0.396015,0.365802,0.209176,0.274848,0.096726,0.058777,0.061655,0.000000,0.174190,0.258031,0.214286,0.0,0.149006,0.651147,0.621272,0.149006,0.646173,0.592769,0.322685,0.194624,0.566508,0.008177,0.015905,0.565550,0.115385,0.111111,0.156627,0.169811,0.0,0.0,0.162162,0.117647,0.135417,0.000000,0.214286,0.100000,0.000000,0.0,0.000000,0.0000,0.0,0.0000,0.206649,0.176726,0.198486,0.162316,0.132523,0.096383,...,0.386588,0.287528,0.538039,0.300214,0.333333,0.000000,0.000000,0.610999,7.500000e-08,3.500000e-08,0.900000,0.944444,0.159851,0.629517,0.160000,0.671056,0.716687,0.756018,0.784559,0.788124,0.785955,0.784181,0.782619,0.781427,0.331363,0.600388,0.0,0.698287,0.299575,0.765724,0.368542,0.770228,0.393018,0.762741,0.336396,0.118837,0.225372,3.605000e-08,0.182741,0.169381,0.169987,0.225764,0.150376,0.454545,0.415730,0.364723,0.488889,0.563574,0.239051,0.157447
YOSLIB01_FSR,0.011109,0.010565,0.108731,0.063569,0.105109,0.578144,0.526960,0.441639,0.007752,0.140969,0.074191,0.113726,0.071429,0.0,0.062866,0.501116,0.501116,0.062866,0.736204,0.396722,0.188773,0.388310,0.377559,0.001901,0.007511,0.378208,0.076923,0.066667,0.060241,0.066038,0.0,0.0,0.081081,0.044118,0.052083,0.083333,0.071429,0.000000,0.000000,0.0,0.000000,0.0000,0.0,0.0000,0.096075,0.081677,0.081601,0.061176,0.028770,0.007485,...,0.228840,0.511364,0.000000,0.000000,0.000000,0.000000,0.000000,0.434161,4.000000e-08,2.000000e-08,0.666667,0.800000,0.053516,0.454321,0.064000,0.490956,0.541315,0.582379,0.613068,0.621970,0.624983,0.627238,0.628894,0.630374,0.213342,0.416216,0.0,0.525828,0.000000,0.608055,0.000000,0.629601,0.000000,0.633444,0.182925,0.043343,0.194635,4.350000e-09,0.045685,0.061889,0.055777,0.060004,0.050125,0.000000,0.494382,0.436748,0.261111,0.838488,0.056569,0.278014
SACCAD_FSR,0.139065,0.130483,0.206300,0.309442,0.215572,0.578142,0.526962,0.438559,0.047573,0.026432,0.121845,0.174644,0.071429,0.0,0.109914,0.558498,0.550266,0.109914,0.814610,0.509278,0.275061,0.284310,0.488665,0.004345,0.011680,0.482933,0.141026,0.122222,0.108434,0.113208,0.0,0.0,0.108108,0.088235,0.093750,0.125000,0.071429,0.000000,0.000000,0.0,0.000000,0.0000,0.0,0.0000,0.132147,0.127756,0.125866,0.081345,0.052083,0.029942,...,0.167734,0.322222,0.510988,0.310832,0.421875,0.000000,0.000000,0.454000,7.000000e-08,3.000000e-08,1.000000,1.000000,0.093443,0.559945,0.112000,0.594051,0.640260,0.676221,0.703830,0.707428,0.706289,0.704990,0.703971,0.703132,0.271359,0.526605,0.0,0.623652,0.471896,0.692485,0.591790,0.700326,0.637619,0.694782,0.313095,0.072393,0.164094,1.580000e-08,0.086294,0.110749,0.104914,0.086255,0.096491,0.000000,0.303371,0.252930,0.133333,0.841924,0.029197,0.014184
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
QOBZUD_FSR,0.159228,0.128743,0.332295,0.297183,0.195961,0.759244,0.850456,0.634459,0.049665,0.061674,0.265723,0.296008,0.000000,0.0,0.258265,0.582732,0.582732,0.258265,0.872057,0.688493,0.392899,0.121207,0.658045,0.032498,0.045405,0.715672,0.307692,0.266667,0.301205,0.264151,0.0,0.0,0.081081,0.323529,0.270833,0.166667,0.000000,0.000000,0.000000,0.0,0.000000,0.0000,0.0,0.0000,0.191560,0.219371,0.202463,0.169412,0.102679,0.078117,...,0.227658,0.252525,0.209123,0.210407,0.356881,0.222694,0.138889,0.392254,1.800000e-07,9.000000e-08,0.900000,0.947368,0.264402,0.722974,0.256000,0.740635,0.767372,0.787859,0.803502,0.795254,0.784532,0.775720,0.768657,0.762957,0.403799,0.700392,0.0,0.758977,0.000000,0.802171,0.000000,0.787671,0.000000,0.766298,0.355434,0.162699,0.095818,1.506000e-07,0.223350,0.247557,0.233732,0.179261,0.255639,0.000000,0.505618,0.456406,0.238889,0.800687,0.043796,0.000000
boydwoo_str_m5_Al_o13_o27_sra_sym_16,0.270484,0.231007,0.097786,0.553784,0.354804,0.343260,0.516240,0.429053,0.061837,0.000000,0.237762,0.328712,0.142857,0.0,0.220645,0.640234,0.640234,0.220645,0.714675,0.663157,0.402132,0.149293,0.643439,0.015169,0.021128,0.637890,0.179487,0.161111,0.210843,0.242925,0.0,0.0,0.277027,0.136029,0.171875,0.083333,0.232143,0.000000,0.000000,0.0,0.000000,0.1375,0.0,0.1375,0.271361,0.250326,0.273002,0.270484,0.203283,0.128345,...,0.328490,0.291749,0.391787,0.352045,0.386578,0.238095,0.199056,0.569840,1.100000e-07,5.500000e-08,0.837500,0.911111,0.240664,0.693765,0.226000,0.725158,0.763869,0.795680,0.819057,0.817218,0.810630,0.805431,0.800858,0.797402,0.390300,0.669126,0.0,0.747323,0.000000,0.805329,0.000000,0.802353,0.000000,0.789436,0.343030,0.236922,0.371113,1.057750e-07,0.272843,0.232085,0.233068,0.294956,0.239662,0.000000,0.112360,0.086723,0.000000,0.422680,0.063869,0.158865
NIMWIR_FSR,0.024456,0.041811,0.064723,0.142504,0.173167,0.578143,0.615112,0.441633,0.011946,0.057269,0.083450,0.127465,0.071429,0.0,0.067783,0.521616,0.521616,0.067783,0.649086,0.422070,0.206011,0.359167,0.403293,0.002223,0.007950,0.399725,0.076923,0.066667,0.066265,0.075472,0.0,0.0,0.081081,0.044118,0.062500,0.041667,0.071429,0.000000,0.000000,0.0,0.083333,0.0000,0.0,0.0500,0.091621,0.079027,0.079163,0.053782,0.033344,0.009426,...,0.395339,0.255682,0.000000,0.000000,0.000000,0.000000,0.000000,0.535655,4.000000e-08,2.000000e-08,0.666667,0.800000,0.070144,0.476106,0.072000,0.516598,0.565301,0.607820,0.637477,0.646408,0.648244,0.650299,0.651242,0.652537,0.226163,0.438784,0.0,0.551273,0.000000,0.632129,0.000000,0.651439,0.000000,0.653774,0.194411,0.058070,0.257462,5.800000e-09,0.055838,0.071661,0.065073,0.083255,0.057644,0.000000,0.505618,0.456406,0.238889,0.800687,0.043796,0.000000
OSUNAT_FSR,0.027635,0.029027,0.101021,0.265624,0.177919,0.583864,0.526959,0.441636,0.023728,0.057269,0.096115,0.134986,0.000000,0.0,0.075127,0.529884,0.529884,0.075127,0.496093,0.422602,0.214427,0.375069,0.401222,0.002935,0.008355,0.394227,0.000000,0.000000,0.082329,0.084906,0.0,0.0,0.108108,0.068627,0.062500,0.027778,0.119048,0.033333,0.222222,0.0,0.000000,0.0000,0.0,0.0000,0.098738,0.053206,0.117754,0.059583,0.031893,0.019236,...,0.238217,0.268939,0.223492,0.410256,0.000000,0.000000,0.000000,0.678721,4.333333e-08,2.000000e-08,0.833333,0.904762,0.085860,0.468845,0.082667,0.524675,0.561790,0.614823,0.635106,0.652721,0.647075,0.656123,0.650997,0.658080,0.234419,0.433645,0.0,0.561830,0.000000,0.646318,0.000000,0.664410,0.000000,0.664690,0.202970,0.064627,0.293476,9.816666e-09,0.067682,0.087948,0.081009,0.095443,0.058480,0.000000,0.719101,0.683676,0.600000,0.378007,0.343066,0.146099


In [10]:
train, test = train_test_split(data_transformed, test_size=0.2, random_state=42)

In [11]:
train.shape, test.shape

((5970, 1018), (1493, 1018))

In [12]:
pd.options.display.max_columns = 100
train

Unnamed: 0,pld,lcd,a,b,c,alpha,beta,gamma,volume,spacegroupNumber,ABC,ABCGG,nAcid,nBase,SpAbs_A,SpMax_A,SpDiam_A,SpAD_A,SpMAD_A,LogEE_A,VE1_A,VE2_A,VE3_A,VR1_A,VR2_A,VR3_A,nAromAtom,nAromBond,nAtom,nHeavyAtom,nSpiro,nBridgehead,nHetero,nH,nC,nN,nO,nS,nP,nF,nCl,nBr,nI,nX,ATS0dv,ATS1dv,ATS2dv,ATS3dv,ATS4dv,ATS5dv,...,JGI4,JGI5,JGI6,JGI7,JGI8,JGI9,JGI10,JGT10,Diameter,Radius,TopoShapeIndex,PetitjeanIndex,Vabc,VAdjMat,MWC01,MWC02,MWC03,MWC04,MWC05,MWC06,MWC07,MWC08,MWC09,MWC10,TMWC10,SRW02,SRW03,SRW04,SRW05,SRW06,SRW07,SRW08,SRW09,SRW10,TSRW10,MW,AMW,WPath,WPol,Zagreb1,Zagreb2,mZagreb1,mZagreb2,n_metals,Atomic_Number,Atomic_Weight,Atomic Radius,Mulliken EN,polarizability(A^3),electron affinity(kJ/mol)
boydwoo_str_m5_Al_o1_o4_sra_sym_103,0.144681,0.134608,0.099578,0.286691,0.201016,0.580615,0.534957,0.432328,0.026953,0.000000,0.133642,0.206239,0.142857,0.000000,0.118076,0.604166,0.604166,0.118076,0.670207,0.533678,0.277267,0.239129,0.511327,0.005487,0.012885,0.511001,0.076923,0.066667,0.132530,0.132075,0.0,0.000000,0.126126,0.117647,0.107639,0.000000,0.202381,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.179182,0.147633,0.161663,0.149132,0.122520,0.060459,...,0.344853,0.361689,0.379358,0.000000,0.000000,0.000000,0.000000,0.563311,6.000000e-08,3.000000e-08,0.750000,0.857143,0.126495,0.575930,0.122667,0.613723,0.661707,0.702054,0.731793,0.737356,0.736850,0.736528,0.735894,0.735666,0.289988,0.543547,0.0,0.646108,0.000000,0.720340,0.00000,0.732019,0.000000,0.729469,0.251615,0.087876,0.161208,2.061667e-08,0.130288,0.124864,0.122178,0.165260,0.122807,0.000000,0.112360,0.086723,0.000000,0.422680,0.063869,0.158865
gmof_Zn2O8N2-DPAC_A-irmof7_A_No1105,0.485215,0.566520,0.401228,0.409167,0.476341,0.577087,0.528019,0.441317,0.172057,0.000000,0.167676,0.228442,0.095238,0.000000,0.148002,0.578607,0.578607,0.148002,0.707163,0.580097,0.348260,0.224702,0.567345,0.006866,0.013418,0.545642,0.145299,0.129630,0.140562,0.163522,0.0,0.000000,0.135135,0.088235,0.138889,0.027778,0.142857,0.000000,0.0,0.095238,0.000000,0.0,0.000000,0.066667,0.210303,0.183592,0.195022,0.183193,0.136243,0.082276,...,0.310576,0.251134,0.371309,0.197293,0.268141,0.224593,0.157293,0.541440,8.666667e-08,4.333333e-08,0.802381,0.889703,0.146293,0.621223,0.154667,0.654366,0.693807,0.727353,0.750627,0.751592,0.746491,0.743385,0.739702,0.737669,0.317158,0.591601,0.0,0.680649,0.000000,0.743461,0.00000,0.745466,0.000000,0.735724,0.276890,0.106636,0.205999,3.993333e-08,0.152284,0.156352,0.149624,0.177261,0.147034,0.090909,0.303371,0.252930,0.133333,0.841924,0.029197,0.014184
boydwoo_str_m4_Al_o17_o25_acs_sym_95,0.180585,0.209831,0.308425,0.269242,0.442703,0.586116,0.526087,0.816238,0.084813,0.000000,0.210640,0.272545,0.142857,0.000000,0.187116,0.791130,0.791130,0.187116,0.803968,0.646908,0.320506,0.155420,0.586413,0.027434,0.053080,0.665612,0.102564,0.105556,0.178715,0.196541,0.0,0.000000,0.126126,0.129902,0.178819,0.000000,0.184524,0.000000,0.0,0.000000,0.041667,0.0,0.000000,0.025000,0.222399,0.217143,0.237034,0.199247,0.140204,0.108144,...,0.453727,0.342762,0.611015,0.603917,0.282152,0.516396,0.629630,0.708465,1.000000e-07,5.000000e-08,0.833333,0.909091,0.174082,0.675817,0.202667,0.724820,0.778441,0.822831,0.857702,0.863703,0.863889,0.863658,0.863669,0.863675,0.377966,0.649836,0.0,0.766286,0.000000,0.856988,0.00000,0.872751,0.000000,0.869917,0.325253,0.129123,0.188667,5.393333e-08,0.210660,0.227470,0.252103,0.192512,0.172254,0.181818,0.112360,0.086723,0.000000,0.422680,0.063869,0.158865
UVUWEP_FSR,0.044646,0.115070,0.216436,0.319392,0.202759,0.674168,0.561955,0.632461,0.045866,0.004405,0.145213,0.196820,0.071429,0.000000,0.134647,0.580558,0.574646,0.134647,0.812668,0.538382,0.287535,0.250485,0.513762,0.007458,0.016001,0.520766,0.173077,0.155556,0.129518,0.136792,0.0,0.000000,0.094595,0.102941,0.125000,0.083333,0.071429,0.050000,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.140752,0.135255,0.142481,0.110476,0.070354,0.044721,...,0.203073,0.371944,0.474613,0.097499,0.011565,0.054878,0.082305,0.452531,8.000000e-08,4.000000e-08,0.791667,0.883117,0.120160,0.585466,0.136000,0.618705,0.662948,0.698102,0.725054,0.727937,0.726056,0.724335,0.722927,0.721859,0.294536,0.553971,0.0,0.646801,0.235948,0.714917,0.30605,0.721701,0.335828,0.715324,0.294937,0.089813,0.178531,2.747500e-08,0.121827,0.135179,0.130810,0.099381,0.120927,0.000000,0.505618,0.456406,0.238889,0.800687,0.043796,0.000000
boydwoo_str_m3_o12_o19_pcu_sym_169,0.161454,0.186949,0.212498,0.175937,0.103219,0.551201,0.321590,0.374313,0.020972,0.000000,0.118580,0.192708,0.095238,0.000000,0.093497,0.511651,0.511651,0.093497,0.454142,0.493189,0.266243,0.273743,0.487269,0.004021,0.009644,0.470827,0.000000,0.000000,0.084337,0.119497,0.0,0.000000,0.243243,0.019608,0.048611,0.083333,0.261905,0.066667,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.205155,0.127756,0.175947,0.113427,0.116843,0.079511,...,0.243066,0.240260,0.574074,0.000000,0.000000,0.000000,0.000000,0.683360,6.000000e-08,3.000000e-08,0.750000,0.857143,0.109758,0.533363,0.101333,0.569275,0.603032,0.642150,0.662795,0.669553,0.665147,0.666049,0.662887,0.663524,0.258797,0.498931,0.0,0.600217,0.000000,0.665033,0.00000,0.670800,0.000000,0.664277,0.229805,0.088439,0.329173,1.995000e-08,0.087986,0.102063,0.089420,0.195387,0.103592,0.000000,0.303371,0.252930,0.133333,0.841924,0.029197,0.014184
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
SUJREV_FSR,0.094003,0.090175,0.305691,0.295653,0.322737,0.578144,0.526959,0.441635,0.081708,0.264317,0.165031,0.219652,0.071429,0.000000,0.154080,0.547543,0.547543,0.154080,0.712472,0.542574,0.285168,0.244117,0.520717,0.012153,0.019828,0.533630,0.153846,0.133333,0.174699,0.160377,0.0,0.000000,0.081081,0.176471,0.156250,0.083333,0.071429,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.147530,0.141037,0.130485,0.099328,0.074653,0.068904,...,0.268986,0.180195,0.367556,0.044379,0.081633,0.073171,0.000000,0.411599,7.500000e-08,4.000000e-08,0.708333,0.828571,0.156310,0.585466,0.156000,0.609882,0.643350,0.675580,0.696748,0.699578,0.695315,0.694117,0.691369,0.690784,0.303100,0.554887,0.0,0.638844,0.000000,0.697628,0.00000,0.698962,0.000000,0.689715,0.264254,0.101005,0.142215,4.922500e-08,0.129442,0.151466,0.144754,0.116257,0.149749,0.000000,0.303371,0.252930,0.133333,0.841924,0.029197,0.014184
COWRIR_FSR,0.042181,0.095206,0.401058,0.387969,0.289401,0.578143,0.526962,0.572850,0.112291,0.057269,0.182664,0.244133,0.142857,0.000000,0.163604,0.601237,0.601237,0.163604,0.720885,0.601716,0.335631,0.187518,0.579181,0.009739,0.018303,0.584324,0.153846,0.133333,0.162651,0.179245,0.0,0.000000,0.162162,0.117647,0.145833,0.000000,0.214286,0.100000,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.210892,0.150705,0.201694,0.158655,0.104497,0.059372,...,0.214775,0.346591,0.515556,0.330769,0.307692,0.356098,0.665021,0.676490,1.100000e-07,5.000000e-08,1.000000,1.000000,0.171183,0.639136,0.168000,0.674533,0.712919,0.744957,0.768221,0.767918,0.762720,0.758908,0.755403,0.753004,0.332374,0.610640,0.0,0.700295,0.000000,0.764071,0.00000,0.764192,0.000000,0.752406,0.291565,0.124198,0.202951,4.915000e-08,0.172589,0.172638,0.166003,0.201950,0.160401,0.000000,0.269663,0.224989,0.188889,0.790378,0.049270,0.190071
boydwoo_str_m3_o5_o22_nbo_sym_100,0.208717,0.373788,0.411771,0.379289,0.223143,0.690981,0.877699,0.743917,0.072399,0.048458,0.118899,0.186130,0.142857,0.000000,0.094238,0.576908,0.576908,0.094238,0.508999,0.475544,0.245352,0.306190,0.456942,0.004279,0.009668,0.446942,0.000000,0.000000,0.090361,0.113208,0.0,0.041667,0.162162,0.044118,0.072917,0.000000,0.142857,0.000000,0.0,0.071429,0.000000,0.0,0.333333,0.150000,0.158696,0.119130,0.136614,0.117752,0.092166,0.046190,...,0.450729,0.119048,0.333333,0.000000,0.000000,0.000000,0.000000,0.763121,5.000000e-08,2.500000e-08,0.708333,0.828571,0.137392,0.515937,0.104000,0.564739,0.611255,0.656064,0.684078,0.693231,0.692891,0.694843,0.694418,0.695610,0.264124,0.481651,0.0,0.598098,0.000000,0.678428,0.00000,0.694547,0.000000,0.694451,0.229340,0.177722,0.586363,1.702500e-08,0.147208,0.117264,0.122178,0.181136,0.091165,0.090909,0.303371,0.252930,0.133333,0.841924,0.029197,0.014184
GETWEJ_FSR,0.030842,0.042997,0.113375,0.079831,0.058477,0.578140,0.851515,0.441659,0.005776,0.083700,0.121845,0.174644,0.071429,0.136364,0.109914,0.558498,0.550266,0.109914,0.814610,0.509278,0.275061,0.284310,0.488665,0.004345,0.011680,0.482933,0.076923,0.066667,0.096386,0.113208,0.0,0.000000,0.135135,0.058824,0.083333,0.166667,0.071429,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.144348,0.141850,0.139723,0.087395,0.057044,0.032245,...,0.167734,0.322222,0.510988,0.310832,0.421875,0.000000,0.000000,0.454000,7.000000e-08,3.000000e-08,1.000000,1.000000,0.095980,0.559945,0.112000,0.594051,0.640260,0.676221,0.703830,0.707428,0.706289,0.704990,0.703971,0.703132,0.271359,0.526605,0.0,0.623652,0.471896,0.692485,0.59179,0.700326,0.637619,0.694782,0.313095,0.072388,0.201635,1.580000e-08,0.086294,0.110749,0.104914,0.086255,0.096491,0.000000,0.887640,0.866592,0.200000,0.652921,0.036496,0.147518


In [13]:
train.to_csv("../qmof_datasets/train.csv")
test.to_csv("../qmof_datasets/test.csv")
joblib.dump(model, "../qmof_datasets/scaler.pkl")

['../qmof_datasets/scaler.pkl']