In [1]:
import numpy as np
import pandas as pd

from rdkit import Chem


import sklearn
from sklearn.model_selection import train_test_split
from sklearn import linear_model
from sklearn.metrics import mean_squared_error, r2_score
from sklearn import neighbors, svm
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline
from sklearn.svm import SVC, SVR
from sklearn.ensemble import BaggingClassifier, RandomForestClassifier, GradientBoostingClassifier
from sklearn.model_selection import GridSearchCV


import matplotlib
#matplotlib.use('GTKAgg')
import matplotlib.pyplot as plt
from matplotlib import cm
from matplotlib.colors import ListedColormap
%matplotlib inline
matplotlib.rcParams.update({'font.size': 18})

In [4]:
data_AUC = pd.read_csv('adme/data/AUC_final.csv')
data_CL_max = pd.read_csv('adme/data/CL_max_final.csv')
data_Cmax = pd.read_csv('adme/data/Cmax_final.csv')
data_F = pd.read_csv('adme/data/F_final.csv')
data_IC_50 = pd.read_csv('adme/data/IC_50_final.csv')
data_LD_50 = pd.read_csv('adme/data/LD_50_final.csv')
data_PPB = pd.read_csv('adme/data/PPB_final.csv')
data_T_half = pd.read_csv('adme/data/T_1_2_final.csv')
data_Vdss = pd.read_csv('adme/data/Vdss_final.csv')


In [5]:
df = data_T_half

In [6]:
df

Unnamed: 0,chembl_id,organism,canonical_smiles,T1/2 /hr
0,CHEMBL99828,Rattus norvegicus,Cc1cc(-c2ccc(S(C)(=O)=O)cc2)c(-c2ccc(F)cc2)s1,19.40000
1,CHEMBL9946,Rattus norvegicus,CCCCN(CC)c1nc(C)nc2c1c(C)cn2-c1c(C)cc(C)cc1C,1.50000
2,CHEMBL99214,Rattus norvegicus,C[C@@H]1[C@H](OC(=O)N[C@@H](CSc2ccccc2)[C@H](O...,2.53300
3,CHEMBL99066,Rattus norvegicus,CC(C)c1ccccc1Sc1ccc(/C=C/C(=O)N2CCCC(C(=O)O)C2...,0.40000
4,CHEMBL98936,Rattus norvegicus,O=C(N[C@@H](Cc1ccccc1)C[C@H](O)[C@H](Cc1ccccc1...,0.69000
...,...,...,...,...
17232,CHEMBL101083,Rattus norvegicus,CC1(c2ccc(OCCN3CCCCC3)cc2)c2ccc(O)cc2CCN1c1ccc...,7.10000
17233,CHEMBL1007,Rattus norvegicus,CC(C)C[C@H](NC(=O)CNC(=O)[C@H](Cc1ccc(O)cc1)NC...,2.63000
17234,CHEMBL100379,Rattus norvegicus,C[C@H](NC(=O)/C=C/c1ccc(F)cc1)c1ccc(F)c(N2CCOC...,3.50000
17235,CHEMBL100367,Rattus norvegicus,O=C(O)C1CCCN(C(=O)/C=C/c2ccc(Sc3ccc4c(c3)OCCO4...,2.30000


In [7]:
df = df[df['T1/2 /hr'] < 1]

In [8]:
df

Unnamed: 0,chembl_id,organism,canonical_smiles,T1/2 /hr
3,CHEMBL99066,Rattus norvegicus,CC(C)c1ccccc1Sc1ccc(/C=C/C(=O)N2CCCC(C(=O)O)C2...,0.40000
4,CHEMBL98936,Rattus norvegicus,O=C(N[C@@H](Cc1ccccc1)C[C@H](O)[C@H](Cc1ccccc1...,0.69000
7,CHEMBL982,Mus musculus,C=C1CC[C@@]2(O)[C@H]3Cc4ccc(O)c5c4[C@@]2(CCN3C...,0.33330
8,CHEMBL9806,Mus musculus,CS(=O)(=O)Nc1ccc(Nc2c3ccccc3nc3ccccc23)cc1,0.91670
9,CHEMBL98,Mus musculus,O=C(CCCCCCC(=O)Nc1ccccc1)NO,0.38000
...,...,...,...,...
17219,CHEMBL10257,Rattus norvegicus,COc1ccc(C#Cc2ccc(S(=O)(=O)NC(CC#Cc3ccccc3)C(=O...,0.70000
17222,CHEMBL102299,Mus musculus,Cc1ccccc1NC(=O)Nc1ccc(CC(=O)N[C@@H](CC(C)C)C(=...,0.80000
17226,CHEMBL101874,Bos taurus,CC[C@H](C)[C@H](NC(=O)[C@@H](NC(=O)[C@H](CC(C)...,0.16670
17230,CHEMBL1014,Rattus norvegicus,CCOc1nc2cccc(C(=O)OC(C)OC(=O)OC3CCCCC3)c2n1Cc1...,0.08200


In [9]:
from rdkit.Chem import Descriptors

def generate(smiles, verbose=False):

    moldata= []
    for elem in smiles:
        mol=Chem.MolFromSmiles(elem) 
        moldata.append(mol)
       
    baseData= np.arange(1,1)
    i=0  
    for mol in moldata:        
       
        #desc_MolLogP = Descriptors.MolLogP(mol)
        desc_MolWt = Descriptors.MolWt(mol)
        desc_NumRotatableBonds = Descriptors.NumRotatableBonds(mol)
           
        row = np.array([#desc_MolLogP,
                        desc_MolWt,
                        desc_NumRotatableBonds])   
    
        if(i==0):
            baseData=row
        else:
            baseData=np.vstack([baseData, row])
        i=i+1      
    
    columnNames=["MolWt","NumRotatableBonds"]   
    descriptors = pd.DataFrame(data=baseData,columns=columnNames)
    
    return descriptors

def generate_1(smiles, verbose=False):

    moldata= []
    for elem in smiles:
        mol=Chem.MolFromSmiles(elem) 
        moldata.append(mol)
       
    baseData= np.arange(1,1)
    i=0  
    for mol in moldata:        
       
        desc_NumHAcceptors = Descriptors.NumHAcceptors(mol)
        desc_NumHDonors = Descriptors.NumHDonors(mol)
        desc_NumHeteroatoms = Descriptors.NumHeteroatoms(mol)
           
        row = np.array([desc_NumHAcceptors,
                        desc_NumHDonors,
                        desc_NumHeteroatoms])   
    
        if(i==0):
            baseData=row
        else:
            baseData=np.vstack([baseData, row])
        i=i+1      
    
    columnNames=["NumHAcceptors","NumHDonors","NumHeteroatoms"]   
    descriptors = pd.DataFrame(data=baseData,columns=columnNames)
    
    return descriptors

def generate_2(smiles, verbose=False):

    moldata= []
    for elem in smiles:
        mol=Chem.MolFromSmiles(elem) 
        moldata.append(mol)
       
    baseData= np.arange(1,1)
    i=0  
    for mol in moldata:        
       
        desc_MolMR = Descriptors.MolMR(mol)
        desc_HeavyAtomCount = Descriptors.HeavyAtomCount(mol)
        desc_HeavyAtomMolwt = Descriptors.HeavyAtomMolWt(mol)
           
        row = np.array([desc_MolMR,
                        desc_HeavyAtomCount,
                        desc_HeavyAtomMolwt])   
    
        if(i==0):
            baseData=row
        else:
            baseData=np.vstack([baseData, row])
        i=i+1      
    
    columnNames=["MolMR","HeavyAtomCount","HeavyAtomMolwt"]   
    descriptors = pd.DataFrame(data=baseData,columns=columnNames)
    
    return descriptors

def generate_3(smiles, verbose=False):

    moldata= []
    for elem in smiles:
        mol=Chem.MolFromSmiles(elem) 
        moldata.append(mol)
       
    baseData= np.arange(1,1)
    i=0  
    for mol in moldata:        
       
        desc_NOCount = Descriptors.NOCount(mol)
        #desc_NHOHCount = Descriptors.NHOHCount(mol)
        desc_NumValenceElectrons = Descriptors.NumValenceElectrons(mol)
           
        row = np.array([desc_NOCount,
                        #desc_NHOHCount,
                        desc_NumValenceElectrons])   
    
        if(i==0):
            baseData=row
        else:
            baseData=np.vstack([baseData, row])
        i=i+1      
    
    columnNames=["NOCount","NumValenceElectrons"]   
    descriptors = pd.DataFrame(data=baseData,columns=columnNames)
    
    return descriptors

def generate_4(smiles, verbose=False):

    moldata= []
    for elem in smiles:
        mol=Chem.MolFromSmiles(elem) 
        moldata.append(mol)
       
    baseData= np.arange(1,1)
    i=0  
    for mol in moldata:        
       
        desc_RingCount = Descriptors.RingCount(mol)
        desc_qed = Descriptors.qed(mol)
        desc_MaxAbsEStateIndex = Descriptors.MaxAbsEStateIndex(mol)
           
        row = np.array([desc_RingCount,
                        desc_qed,
                        desc_MaxAbsEStateIndex])   
    
        if(i==0):
            baseData=row
        else:
            baseData=np.vstack([baseData, row])
        i=i+1      
    
    columnNames=["RingCount","qed","MaxAbsEStateIndex"]   
    descriptors = pd.DataFrame(data=baseData,columns=columnNames)
    
    return descriptors

def generate_5(smiles, verbose=False):

    moldata= []
    for elem in smiles:
        mol=Chem.MolFromSmiles(elem) 
        moldata.append(mol)
       
    baseData= np.arange(1,1)
    i=0  
    for mol in moldata:        
       
        desc_TPSA = Descriptors.TPSA(mol)
        desc_LabuteASA = Descriptors.LabuteASA(mol)
        desc_PEOE_VSA1 = Descriptors.PEOE_VSA1(mol)
           
        row = np.array([desc_TPSA,
                        desc_LabuteASA,
                        desc_PEOE_VSA1])   
    
        if(i==0):
            baseData=row
        else:
            baseData=np.vstack([baseData, row])
        i=i+1      
    
    columnNames=["TPSA","LabuteASA","PEOE_VSA1"]   
    descriptors = pd.DataFrame(data=baseData,columns=columnNames)
    
    return descriptors



def generate_7(smiles, verbose=False):

    moldata= []
    for elem in smiles:
        mol=Chem.MolFromSmiles(elem) 
        moldata.append(mol)
       
    baseData= np.arange(1,1)
    i=0  
    for mol in moldata:        
       
        desc_FractionCSP3 = Descriptors.FractionCSP3(mol)
        desc_MaxPartialCharge = Descriptors.MaxPartialCharge(mol)
        desc_MinPartialCharge = Descriptors.MinPartialCharge(mol)
           
        row = np.array([desc_FractionCSP3,
                        desc_MaxPartialCharge,
                        desc_MinPartialCharge])   
    
        if(i==0):
            baseData=row
        else:
            baseData=np.vstack([baseData, row])
        i=i+1      
    
    columnNames=["FractionCSP3","MaxPartialCharge","MinPartialCharge"]   
    descriptors = pd.DataFrame(data=baseData,columns=columnNames)
    
    return descriptors

def generate_8(smiles, verbose=False):

    moldata= []
    for elem in smiles:
        mol=Chem.MolFromSmiles(elem) 
        moldata.append(mol)
       
    baseData= np.arange(1,1)
    i=0  
    for mol in moldata:        
       
        desc_FpDensityMorgan1 = Descriptors.FpDensityMorgan1(mol)
        desc_FpDensityMorgan2 = Descriptors.FpDensityMorgan2(mol)
        desc_FpDensityMorgan3 = Descriptors.FpDensityMorgan3(mol)
           
        row = np.array([desc_FpDensityMorgan1,
                        desc_FpDensityMorgan2,
                        desc_FpDensityMorgan3])   
    
        if(i==0):
            baseData=row
        else:
            baseData=np.vstack([baseData, row])
        i=i+1      
    
    columnNames=["FpDensityMorgan1","FpDensityMorgan2","FpDensityMorgan3"]   
    descriptors = pd.DataFrame(data=baseData,columns=columnNames)
    
    return descriptors

def generate_9(smiles, verbose=False):

    moldata= []
    for elem in smiles:
        mol=Chem.MolFromSmiles(elem) 
        moldata.append(mol)
       
    baseData= np.arange(1,1)
    i=0  
    for mol in moldata:        
       
        desc_Chi3n = Descriptors.Chi3n(mol)
        desc_Chi3v = Descriptors.Chi3v(mol)
        #desc_Chi4v = Descriptors.Chi4v(mol)
           
        row = np.array([desc_Chi3n,
                        desc_Chi3v])
                        #desc_Chi4v])   
    
        if(i==0):
            baseData=row
        else:
            baseData=np.vstack([baseData, row])
        i=i+1      
    
    columnNames=["Chi3n","Chi3v"]   
    descriptors = pd.DataFrame(data=baseData,columns=columnNames)
    
    return descriptors

def generate_10(smiles, verbose=False):

    moldata= []
    for elem in smiles:
        mol=Chem.MolFromSmiles(elem) 
        moldata.append(mol)
       
    baseData= np.arange(1,1)
    i=0  
    for mol in moldata:        
       
        desc_Kappa3 = Descriptors.Kappa3(mol)
        desc_PEOE_VSA10 = Descriptors.PEOE_VSA10(mol)
        desc_SMR_VSA4 = Descriptors.SMR_VSA4(mol)
           
        row = np.array([desc_Kappa3,
                        desc_PEOE_VSA10,
                        desc_SMR_VSA4])   
    
        if(i==0):
            baseData=row
        else:
            baseData=np.vstack([baseData, row])
        i=i+1      
    
    columnNames=["Kappa3","PEOE_VSA10","SMR_VSA4"]   
    descriptors = pd.DataFrame(data=baseData,columns=columnNames)
    
    return descriptors

def generate_11(smiles, verbose=False):

    moldata= []
    for elem in smiles:
        mol=Chem.MolFromSmiles(elem) 
        moldata.append(mol)
       
    baseData= np.arange(1,1)
    i=0  
    for mol in moldata:        
       
        desc_PEOE_VSA12 = Descriptors.PEOE_VSA12(mol)
        desc_PEOE_VSA2 = Descriptors.PEOE_VSA2(mol)
        desc_PEOE_VSA5 = Descriptors.PEOE_VSA5(mol)
           
        row = np.array([desc_PEOE_VSA12,
                        desc_PEOE_VSA2,
                        desc_PEOE_VSA5])   
    
        if(i==0):
            baseData=row
        else:
            baseData=np.vstack([baseData, row])
        i=i+1      
    
    columnNames=["PEOE_VSA12","PEOE_VSA2","PEOE_VSA5"]   
    descriptors = pd.DataFrame(data=baseData,columns=columnNames)
    
    return descriptors

def generate_11(smiles, verbose=False):

    moldata= []
    for elem in smiles:
        mol=Chem.MolFromSmiles(elem) 
        moldata.append(mol)
       
    baseData= np.arange(1,1)
    i=0  
    for mol in moldata:        
       
        desc_SMR_VSA10 = Descriptors.SMR_VSA10(mol)
        desc_PEOE_VSA9 = Descriptors.PEOE_VSA9(mol)
        desc_PEOE_VSA6 = Descriptors.PEOE_VSA6(mol)
           
        row = np.array([desc_SMR_VSA10,
                        desc_PEOE_VSA9,
                        desc_PEOE_VSA6])   
    
        if(i==0):
            baseData=row
        else:
            baseData=np.vstack([baseData, row])
        i=i+1      
    
    columnNames=["SMR_VSA10","PEOE_VSA9","PEOE_VSA6"]   
    descriptors = pd.DataFrame(data=baseData,columns=columnNames)
    
    return descriptors




def generate_14(smiles, verbose=False):

    moldata= []
    for elem in smiles:
        mol=Chem.MolFromSmiles(elem) 
        moldata.append(mol)
       
    baseData= np.arange(1,1)
    i=0  
    for mol in moldata:        
       
        desc_SlogP_VSA4 = Descriptors.SlogP_VSA4(mol)
        desc_SlogP_VSA5 = Descriptors.SlogP_VSA5(mol)
        desc_SlogP_VSA2 = Descriptors.SlogP_VSA2(mol)
           
        row = np.array([desc_SlogP_VSA4,
                        desc_SlogP_VSA5,
                        desc_SlogP_VSA2])   
    
        if(i==0):
            baseData=row
        else:
            baseData=np.vstack([baseData, row])
        i=i+1      
    
    columnNames=["SlogP_VSA4","SlogP_VSA5","SlogP_VSA2"]   
    descriptors = pd.DataFrame(data=baseData,columns=columnNames)
    
    return descriptors

def generate_15(smiles, verbose=False):

    moldata= []
    for elem in smiles:
        mol=Chem.MolFromSmiles(elem) 
        moldata.append(mol)
       
    baseData= np.arange(1,1)
    i=0  
    for mol in moldata:        
       
        desc_SlogP_VSA7 = Descriptors.SlogP_VSA7(mol)
        
        desc_Estate_VSA1 = Descriptors.EState_VSA1(mol)
           
        row = np.array([desc_SlogP_VSA7,
                        
                        desc_Estate_VSA1])   
    
        if(i==0):
            baseData=row
        else:
            baseData=np.vstack([baseData, row])
        i=i+1      
    
    columnNames=["SlogP_VSA7","Estate_VSA1"]   
    descriptors = pd.DataFrame(data=baseData,columns=columnNames)
    
    return descriptors

def generate_16(smiles, verbose=False):

    moldata= []
    for elem in smiles:
        mol=Chem.MolFromSmiles(elem) 
        moldata.append(mol)
       
    baseData= np.arange(1,1)
    i=0  
    for mol in moldata:        
       
        desc_Estate_VSA10 = Descriptors.EState_VSA10(mol)
        desc_Estate_VSA2 = Descriptors.EState_VSA2(mol)
        desc_Estate_VSA3 = Descriptors.EState_VSA3(mol)
           
        row = np.array([desc_Estate_VSA10,
                        desc_Estate_VSA2,
                        desc_Estate_VSA3])   
    
        if(i==0):
            baseData=row
        else:
            baseData=np.vstack([baseData, row])
        i=i+1      
    
    columnNames=["Estate_VSA10","Estate_VSA2","Estate_VSA3"]   
    descriptors = pd.DataFrame(data=baseData,columns=columnNames)
    
    return descriptors

def generate_17(smiles, verbose=False):

    moldata= []
    for elem in smiles:
        mol=Chem.MolFromSmiles(elem) 
        moldata.append(mol)
       
    baseData= np.arange(1,1)
    i=0  
    for mol in moldata:        
       
        desc_Estate_VSA4 = Descriptors.EState_VSA4(mol)
        desc_Estate_VSA7 = Descriptors.EState_VSA7(mol)
        desc_Estate_VSA8 = Descriptors.EState_VSA8(mol)
           
        row = np.array([desc_Estate_VSA4,
                        desc_Estate_VSA7,
                        desc_Estate_VSA8])   
    
        if(i==0):
            baseData=row
        else:
            baseData=np.vstack([baseData, row])
        i=i+1      
    
    columnNames=["Estate_VSA4","Estate_VSA7","Estate_VSA8"]   
    descriptors = pd.DataFrame(data=baseData,columns=columnNames)
    
    return descriptors

def generate_18(smiles, verbose=False):

    moldata= []
    for elem in smiles:
        mol=Chem.MolFromSmiles(elem) 
        moldata.append(mol)
       
    baseData= np.arange(1,1)
    i=0  
    for mol in moldata:        
       
        desc_EState_VSA9 = Descriptors.EState_VSA9(mol)
        desc_VSA_Estate1 = Descriptors.VSA_EState1(mol)
        
        row = np.array([desc_EState_VSA9,
                        desc_VSA_Estate1
                        ])   
    
        if(i==0):
            baseData=row
        else:
            baseData=np.vstack([baseData, row])
        i=i+1      
    
    columnNames=["EState_VSA9","VSA_Estate1"]   
    descriptors = pd.DataFrame(data=baseData,columns=columnNames)
    
    return descriptors



def AromaticAtoms(m):
  aromatic_atoms = [m.GetAtomWithIdx(i).GetIsAromatic() for i in range(m.GetNumAtoms())]
  aa_count = []
  for i in aromatic_atoms:
    if i==True:
      aa_count.append(1)
  sum_aa_count = sum(aa_count)
  return sum_aa_count

In [10]:
df1 = generate(df.canonical_smiles)

In [11]:
df2 = generate_1(df.canonical_smiles)

In [12]:
df3 = generate_2(df.canonical_smiles)

In [13]:
df4 = generate_3(df.canonical_smiles)

In [14]:
df5 = generate_4(df.canonical_smiles)

In [15]:
df6 = generate_5(df.canonical_smiles)

In [16]:
df8 = generate_7(df.canonical_smiles)

In [17]:
df9 = generate_8(df.canonical_smiles)

In [18]:
df10 = generate_9(df.canonical_smiles)

In [19]:
df11 = generate_10(df.canonical_smiles)

In [20]:
df12 = generate_11(df.canonical_smiles)

In [21]:
# df13 = generate_12(df.canonical_smiles)

In [22]:
# df14 = generate_13(df.canonical_smiles)

In [23]:
df15 = generate_14(df.canonical_smiles)

In [24]:
df16 = generate_15(df.canonical_smiles)

In [25]:
df17 = generate_16(df.canonical_smiles)

In [26]:
df18 = generate_17(df.canonical_smiles)

In [27]:
df19 = generate_18(df.canonical_smiles)

In [28]:
mol_list= []
for element in df.canonical_smiles:
  mol = Chem.MolFromSmiles(element)
  mol_list.append(mol)

desc_AromaticProportion = [AromaticAtoms(element)/Descriptors.HeavyAtomCount(element) for element in mol_list]
df_desc_AromaticProportion = pd.DataFrame(desc_AromaticProportion, columns=['AromaticProportion'])


In [29]:
X = pd.concat([df1,df2,df3,df4,df5,df6,df8,df9,df10,df11,df12,df15,df16,df17,df18,df19], axis=1)
X

Unnamed: 0,MolWt,NumRotatableBonds,NumHAcceptors,NumHDonors,NumHeteroatoms,MolMR,HeavyAtomCount,HeavyAtomMolwt,NOCount,NumValenceElectrons,...,SlogP_VSA7,Estate_VSA1,Estate_VSA10,Estate_VSA2,Estate_VSA3,Estate_VSA4,Estate_VSA7,Estate_VSA8,EState_VSA9,VSA_Estate1
0,454.548,7.0,5,1,8,123.7242,32.0,428.340,7,168,...,0.0,16.810522,24.809920,18.139322,35.763241,10.458935,24.265468,13.847474,0.000000,0.000000
1,567.664,11.0,9,3,11,146.9692,40.0,534.400,10,212,...,0.0,36.478095,14.695602,25.235636,24.511733,17.547725,60.663671,15.617556,18.947452,22.165829
2,339.435,2.0,4,2,4,93.9306,25.0,314.235,4,132,...,0.0,11.016041,10.213055,17.895319,5.749512,55.827261,6.066367,11.478845,4.736863,6.287456
3,363.442,4.0,4,2,6,107.4402,26.0,346.306,5,130,...,0.0,10.023291,8.417797,0.000000,5.687386,39.436392,60.663671,10.038883,4.983979,25.137508
4,264.325,8.0,3,3,5,72.7029,19.0,244.165,5,104,...,0.0,0.000000,9.589074,11.814359,12.841643,31.370673,30.331835,5.316789,5.207253,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5791,459.523,6.0,4,2,7,125.0983,33.0,438.355,6,166,...,0.0,22.034437,18.318862,11.316305,5.563451,16.876415,30.331835,28.403832,4.736863,32.633011
5792,522.646,10.0,4,4,9,146.9569,38.0,484.342,9,204,...,0.0,12.011146,24.284676,42.646864,25.073786,29.655933,45.036680,15.950366,0.000000,0.000000
5793,813.010,21.0,10,7,17,216.6869,58.0,748.498,17,322,...,0.0,89.368446,33.561760,31.098277,12.114750,5.563451,26.837579,41.868689,9.473726,10.260239
5794,610.671,10.0,11,1,12,164.3702,45.0,576.399,12,232,...,0.0,18.414698,9.589074,11.667418,36.019909,54.357914,66.088140,25.608112,18.947452,23.894188


In [32]:
y = df.iloc[:,3]

In [33]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [34]:
X_train_SS = StandardScaler().fit(X_train)
X_train = X_train_SS.transform(X_train)
X_test = X_train_SS.transform(X_test)

regressor=SVR(kernel='rbf',verbose=False, C = 1, gamma = 0.2)
regressor.fit(X_train, y_train)

y_pred_test = regressor.predict(X_test)

print('Mean squared error (MSE): %.2f'
      % mean_squared_error(y_test, y_pred_test))
print('Coefficient of determination (R^2): %.2f'
      % r2_score(y_test, y_pred_test))

Mean squared error (MSE): 0.06
Coefficient of determination (R^2): 0.19
