In [1]:
import numpy as np
import pandas as pd

import deepchem as dc

import sklearn
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from mlxtend.classifier import StackingClassifier

from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import KFold

from sklearn import metrics
from sklearn.metrics import make_scorer
from sklearn.metrics import confusion_matrix
from sklearn.metrics import roc_auc_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score

from functools import reduce

import warnings
warnings.filterwarnings('ignore')

import pickle
from sklearn.feature_selection import VarianceThreshold
import matplotlib.pyplot as plt
import seaborn as sns

Skipped loading some Tensorflow models, missing a dependency. No module named 'tensorflow'
Skipped loading some PyTorch models, missing a dependency. No module named 'torch'
Skipped loading modules with pytorch-geometric dependency, missing a dependency. No module named 'torch'
Skipped loading modules with pytorch-lightning dependency, missing a dependency. No module named 'torch'
Skipped loading some Jax models, missing a dependency. No module named 'jax'


# 导入模型

In [2]:
with open('rf_maccs_model.pkl', 'rb') as f:
    rf_maccs_model = pickle.load(f)

In [3]:
with open('rf_rdkit_model.pkl', 'rb') as f:
    rf_rdkit_model = pickle.load(f)

In [4]:
with open('svm_maccs_model.pkl', 'rb') as f:
    svm_maccs_model = pickle.load(f)

In [5]:
with open('xgb_maccs_model.pkl', 'rb') as f:
    xgb_maccs_model = pickle.load(f)

# 导入数据

In [6]:
data = pd.read_csv('../data/GABAA.csv',encoding='gb18030')
plant = pd.read_csv('../data/AD_results(0.6).csv',encoding='gb18030')

In [8]:
plant

Unnamed: 0,SMILES,InDomain,Volatile_compounds,Analysis_method,Cite,smiles,final_pred
0,CCCCCO,True,1-Pentanol,GC-MS,"Morteza-Semnani K, Saeedi M, Akbarzadeh M. Che...",CCCCCO,1
1,CCCCCCCCCCCCCCCC(=O)O,True,Palmitic Acid,GC-MS,"Morteza-Semnani K, Saeedi M, Akbarzadeh M. Che...",CCCCCCCCCCCCCCCC(=O)O,1
2,CCCCCCCCCCCCCCCC(=O)OC,True,Methyl palmitate,GC-MS,"Morteza-Semnani K, Saeedi M, Akbarzadeh M. Che...",CCCCCCCCCCCCCCCC(=O)OC,1
3,CC=CC(=O)C1=C(CCCC1(C)C)C,True,"(Z)-1-(2,6,6-Trimethyl-1-cyclohexen-1-yl)-2-bu...",GC-MS,"Morteza-Semnani K, Saeedi M, Akbarzadeh M. Che...",CC=CC(=O)C1=C(CCCC1(C)C)C,1
4,CC1(C2CCC1(C(C2)OC=O)C)C,True,Bornyl formate,GC-MS,"El-Sayed Z I A. Chemical composition, antimicr...",CC1(C2CCC1(C(C2)OC=O)C)C,1
...,...,...,...,...,...,...,...
2391,CC1CCCC(=C1)C,False,"1,3-Dimethylcyclohexene",GC-MS,"Liang J, Shao Y, Wu H, et al. Chemical constit...",CC1CCCC(=C1)C,1
2392,CN1C=CC=CC1=O,False,1-Methyl-2-pyridone,GC-MS,"Kim S S, Oh H J, Baik J S, et al. Chemical com...",CN1C=CC=CC1=O,0
2393,CC1=CCC2C1C3C2(CCC3C(C)C)C,False,alpha-Bourbonene,GC-MS,"Kim S S, Oh H J, Baik J S, et al. Chemical com...",CC1=CCC2C1C3C2(CCC3C(C)C)C,1
2394,CC1=C2CC(C(CC2OC1=O)(C)C=C)C(=C)C(=O)OC,False,Deoxysericealactone,GC-MS,"Chou S T, Lai C C, Lai C P, et al. Chemical co...",CC1=C2CC(C(CC2OC1=O)(C)C=C)C(=C)C(=O)OC,1


# 特征提取

In [7]:
def featurizer(featname):
    
    if featname =="MACCS":
        featurizer = dc.feat.MACCSKeysFingerprint()
        features = featurizer.featurize(data['smiles'])
        # 初始化VarianceThreshold对象
        vt = VarianceThreshold(threshold = (.98 * (1 - .98)))

        # 进行特征选择
        X_new = vt.fit_transform(features)
        dataset = dc.data.NumpyDataset(X_new,data['class'])
        splitter = dc.splits.RandomSplitter()
        train_dataset, test_dataset = splitter.train_test_split(dataset=dataset,frac_train=0.8,seed=100)
        
        data_train = train_dataset.X
        data_test = test_dataset.X
        label_train = train_dataset.y
        label_test = test_dataset.y
    
    elif featname =="RDkit":
        featurizer = dc.feat.RDKitDescriptors()
        features = featurizer.featurize(data['smiles'])
        # 初始化VarianceThreshold对象
        vt = VarianceThreshold(threshold = (.98 * (1 - .98)))

        # 进行特征选择
        X_new = vt.fit_transform(features)
        dataset = dc.data.NumpyDataset(X_new,data['class'])
        splitter = dc.splits.RandomSplitter()
        train_dataset, test_dataset = splitter.train_test_split(dataset=dataset,frac_train=0.8,seed=100)
        
        data_train = train_dataset.X
        data_test = test_dataset.X
        label_train = train_dataset.y
        label_test = test_dataset.y
     
    else:
        pass
    return  data_train, data_test, label_train, label_test

# 定义模型

In [8]:
def SelectModel(modelname):

    if modelname == "rf_maccs":
        model = rf_maccs_model

    elif modelname == "rf_rdkit":
        model = rf_rdkit_model
        
    elif modelname == "svm_maccs":
        model = svm_maccs_model

    elif modelname == "xgb_maccs":
        model = xgb_maccs_model

    else:
        pass
    
    return model

In [9]:
def get_oof(clf,n_folds,X_train,y_train,X_test):
    ntrain = X_train.shape[0]
    ntest =  X_test.shape[0]
    classnum = len(np.unique(y_train))
    kf = KFold(n_splits=n_folds,random_state=42,shuffle=True)
    oof_train = np.zeros((ntrain,classnum))
    oof_test = np.zeros((ntest,classnum))

    for i,(train_index, test_index) in enumerate(kf.split(X_train)):
        kf_X_train = X_train[train_index] # 数据
        kf_y_train = y_train[train_index] # 标签

        kf_X_test = X_train[test_index]  # k-fold的验证集

        #clf.fit(kf_X_train, kf_y_train)
        oof_train[test_index] = clf.predict_proba(kf_X_test)

        oof_test += clf.predict_proba(X_test)
    oof_test = oof_test/float(n_folds)
    return oof_train, oof_test

In [10]:
# 使用stacking方法的时候
# 第一级，重构特征当做第二级的训练集
modelist = ['rf_maccs','rf_rdkit','svm_maccs','xgb_maccs']
newfeature_list = []
newtestdata_list = []

for modelname in modelist:
    clf_first = SelectModel(modelname)
    
    if modelname == 'rf_maccs':
        data_train, data_test, label_train, label_test = featurizer('MACCS')
    elif modelname == 'rf_rdkit':
        data_train, data_test, label_train, label_test = featurizer('RDkit')
    elif modelname == 'svm_maccs':
        data_train, data_test, label_train, label_test = featurizer('MACCS')
    elif modelname == 'xgb_maccs':
        data_train, data_test, label_train, label_test = featurizer('MACCS')
    else:
        pass
        
    
    oof_train_ ,oof_test_= get_oof(clf=clf_first,n_folds=5,X_train=data_train,y_train=label_train,X_test=data_test)
    newfeature_list.append(oof_train_)
    newtestdata_list.append(oof_test_)


In [11]:
# 特征组合
newfeature = reduce(lambda x,y:np.concatenate((x,y),axis=1),newfeature_list)    
newtestdata = reduce(lambda x,y:np.concatenate((x,y),axis=1),newtestdata_list)

# 第二级，使用上一级输出的当做训练集
clf_second1 = RandomForestClassifier()
clf_second1.fit(newfeature, label_train)

# 定义特征

In [9]:
featurizer_maccs = dc.feat.MACCSKeysFingerprint()
featurizer_rdkit = dc.feat.RDKitDescriptors()

In [10]:
data_features_maccs = featurizer_maccs.featurize(data['smiles'])
data_features_rdkit = featurizer_rdkit.featurize(data['smiles'])

In [10]:
plant_features_maccs = featurizer_maccs.featurize(plant['smiles'])
plant_features_rdkit = featurizer_rdkit.featurize(plant['smiles'])

# 获得方差过滤后的特征maccs

In [11]:
# 初始化VarianceThreshold对象
vt_maccs = VarianceThreshold(threshold = (.98 * (1 - .98)))

# 进行特征选择
data_maccs_new = vt_maccs.fit_transform(data_features_maccs)

In [12]:
data_maccs_mask = vt_maccs.get_support(indices=True)

In [13]:
plant_maccs_features = plant_features_maccs[:, data_maccs_mask]

In [14]:
data_maccs_new.shape

(488, 129)

In [15]:
plant_maccs_features.shape

(2396, 129)

# 获得方差过滤后的特征rdkit

In [16]:
# 初始化VarianceThreshold对象
vt_rdkit = VarianceThreshold(threshold = (.98 * (1 - .98)))

# 进行特征选择
data_rdkit_new = vt_rdkit.fit_transform(data_features_rdkit)

In [17]:
data_rdkit_mask = vt_rdkit.get_support(indices=True)

In [18]:
plant_rdkit_features = plant_features_rdkit[:, data_rdkit_mask]

In [19]:
data_rdkit_new.shape

(488, 164)

In [20]:
plant_rdkit_features.shape

(2396, 164)

# iPlant

In [21]:
# 对 MACCS 特征进行预测
pred_rf_maccs = rf_maccs_model.predict(plant_maccs_features)
pred_rf_rdkit = rf_rdkit_model.predict(plant_rdkit_features)
pred_svm_maccs = svm_maccs_model.predict(plant_maccs_features)
pred_xgb_maccs = xgb_maccs_model.predict(plant_maccs_features)

In [22]:
# 进行投票
pred_count = [pred_rf_maccs, pred_rf_rdkit, pred_svm_maccs, pred_xgb_maccs]
num_ones = sum(pred_count)  # 统计预测结果为1的个数

In [51]:
final_prediction = [1 if num == 4 else 0 for num in num_ones]

In [53]:
final_prediction

list

In [56]:
# 创建 DataFrame
df = pd.DataFrame({
    'name':plant['Volatile_compounds'],
    'smiles':plant['smiles'],
    'final_pred':final_prediction
})

0      1
1      1
2      1
3      1
4      1
      ..
456    0
457    0
458    1
459    1
460    1
Name: final_pred, Length: 461, dtype: int64

In [64]:
# 假设您的DataFrame为df
final_pred_counts = df['final_pred'][:461].value_counts()

# 提取1和0的计数
num_ones = final_pred_counts[1]
num_zeros = final_pred_counts[0]

print("Number of 1s:", num_ones)
print("Number of 0s:", num_zeros)

Number of 1s: 418
Number of 0s: 43


In [58]:
df.to_csv('pred_result_AD(0.7).csv')

In [1]:
import numpy as np
import pandas as pd

import deepchem as dc

import sklearn
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from mlxtend.classifier import StackingClassifier

from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import KFold

from sklearn import metrics
from sklearn.metrics import make_scorer
from sklearn.metrics import confusion_matrix
from sklearn.metrics import roc_auc_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score

from functools import reduce

import warnings
warnings.filterwarnings('ignore')

import pickle
from sklearn.feature_selection import VarianceThreshold
import matplotlib.pyplot as plt
import seaborn as sns

Skipped loading some Tensorflow models, missing a dependency. No module named 'tensorflow'
Skipped loading some PyTorch models, missing a dependency. No module named 'torch'
Skipped loading modules with pytorch-geometric dependency, missing a dependency. No module named 'torch'
Skipped loading modules with pytorch-lightning dependency, missing a dependency. No module named 'torch'
Skipped loading some Jax models, missing a dependency. No module named 'jax'


# 导入模型

In [2]:
with open('rf_maccs_model.pkl', 'rb') as f:
    rf_maccs_model = pickle.load(f)

In [3]:
with open('rf_rdkit_model.pkl', 'rb') as f:
    rf_rdkit_model = pickle.load(f)

In [4]:
with open('svm_maccs_model.pkl', 'rb') as f:
    svm_maccs_model = pickle.load(f)

In [5]:
with open('xgb_maccs_model.pkl', 'rb') as f:
    xgb_maccs_model = pickle.load(f)

# 导入数据

In [6]:
data = pd.read_csv('../data/GABAA.csv',encoding='gb18030')
plant = pd.read_csv('../data/AD_results(0.6).csv',encoding='gb18030')

In [7]:
data

Unnamed: 0,name,origin,label,smiles,class
0,gamma-Aminobutyric acid,10.3390/molecules24152678,GABAA agonist,C(CC(=O)O)CN,1
1,Menthol,10.1111/bph.12602,GABAA agonist,CC1CCC(C(C1)O)C(C)C,1
2,phenobarbital,10.1002/ana.24967,GABAA agonist,CCC1(C(=O)NC(=O)NC1=O)C2=CC=CC=C2,1
3,isoguvacine oxide,10.1002/chir.530070608,GABAA agonist,C1CNCC2C1(O2)C(=O)O,1
4,Clomethiazole,10.1016/s0014-2999(02)02233-1,GABAA agonist,CC1=C(CCCl)SC=N1,1
...,...,...,...,...,...
483,chlorogenic acid,10.1021/jf0303971,GABAA inhibition,C1C(C(C(CC1(C(=O)O)O)OC(=O)C=CC2=CC(=C(C=C2)O)...,0
484,maltol,10.1021/jf0303971,GABAA inhibition,CC1=C(C(=O)C=CO1)O,0
485,Theobromine,10.1021/jf0303971,GABAA inhibition,CN1C=NC2=C1C(=O)NC(=O)N2C,0
486,"2,3,5-trimethylpyrazine",10.1021/jf0303971,GABAA inhibition,CN1C2=C(C(=O)N(C1=O)C)NC=N2,0


In [8]:
featurizer_maccs = dc.feat.MACCSKeysFingerprint()
data_features_maccs = featurizer_maccs.featurize(data['smiles'])
plant_features_maccs = featurizer_maccs.featurize(plant['smiles'])
# 初始化VarianceThreshold对象
vt_maccs = VarianceThreshold(threshold = (.98 * (1 - .98)))

# 进行特征选择
data_maccs_new = vt_maccs.fit_transform(data_features_maccs)
data_maccs_mask = vt_maccs.get_support(indices=True)
plant_maccs_features = plant_features_maccs[:, data_maccs_mask]

dataset = dc.data.NumpyDataset(data_maccs_new,data['class'])

splitter = dc.splits.RandomSplitter()
train_dataset, test_dataset = splitter.train_test_split(dataset=dataset,frac_train=0.8)

data_train = train_dataset.X
data_test = plant_maccs_features
label_train = train_dataset.y

In [11]:
X_train=data_train

In [13]:
y_train=label_train

In [14]:
X_test=data_test

In [16]:
ntrain = X_train.shape[0]
ntest =  X_test.shape[0]
classnum = len(np.unique(y_train))
kf = KFold(n_splits=5,random_state=42,shuffle=True)
oof_train = np.zeros((ntrain,classnum))
oof_test = np.zeros((ntest,classnum))

In [17]:
ntrain

390

In [18]:
ntest

2396

In [23]:
oof_train[0][0]

0.0

In [24]:
kf.split(X_train)

<generator object _BaseKFold.split at 0x0000025896F4C5F0>

In [26]:
clf = xgb_maccs_model

In [29]:
X_test

array([[0, 0, 0, ..., 1, 0, 0],
       [0, 0, 0, ..., 1, 0, 0],
       [0, 0, 0, ..., 1, 0, 0],
       ...,
       [0, 1, 0, ..., 0, 1, 0],
       [0, 0, 0, ..., 1, 1, 0],
       [0, 0, 0, ..., 1, 1, 0]])

In [32]:
for i,(train_index, test_index) in enumerate(kf.split(X_train)):
        kf_X_train = X_train[train_index] # 数据
        kf_y_train = y_train[train_index] # 标签

        kf_X_test = X_train[test_index]  # k-fold的验证集

        #clf.fit(kf_X_train, kf_y_train)
        oof_train[test_index] = clf.predict_proba(kf_X_test)
        print(kf_X_test)
        oof_test += clf.predict_proba(X_test)

[[0 0 0 ... 0 1 0]
 [0 0 0 ... 1 1 0]
 [0 0 0 ... 1 1 0]
 ...
 [0 0 0 ... 1 1 0]
 [0 1 0 ... 0 1 0]
 [0 0 0 ... 1 1 0]]
[[0 0 0 ... 1 1 0]
 [0 0 0 ... 1 1 0]
 [0 0 0 ... 1 1 0]
 ...
 [0 0 0 ... 1 1 0]
 [0 0 0 ... 1 1 0]
 [0 0 0 ... 1 1 0]]
[[0 0 0 ... 1 1 0]
 [0 0 0 ... 1 1 0]
 [0 0 0 ... 1 1 0]
 ...
 [0 1 0 ... 0 1 0]
 [0 0 0 ... 1 0 0]
 [1 1 0 ... 1 1 0]]
[[0 0 0 ... 1 0 0]
 [0 0 0 ... 1 1 0]
 [0 0 0 ... 0 1 0]
 ...
 [0 0 0 ... 1 1 0]
 [0 0 0 ... 1 1 0]
 [0 0 0 ... 1 1 0]]
[[0 0 0 ... 1 1 0]
 [0 0 0 ... 1 1 0]
 [0 0 0 ... 1 1 0]
 ...
 [0 0 0 ... 1 0 0]
 [1 0 0 ... 1 1 0]
 [0 0 0 ... 1 1 0]]


In [39]:
oof_train[test_index].shape

(78, 2)

In [34]:
oof_test = oof_test/float(5)

In [36]:
oof_test = oof_test/float(5)

In [37]:
oof_test

array([[0.09501219, 0.90498781],
       [0.09796995, 0.90203005],
       [0.10022324, 0.89977676],
       ...,
       [0.28692394, 0.71307606],
       [0.33634263, 0.66365737],
       [0.11645609, 0.88354391]])

In [None]:
oof_train[test_index] = clf.predict_proba(kf_X_test)

# Feature extraction & Data splitting

In [19]:
def featurizer(featname):
    
    if featname =="MACCS":
        featurizer_maccs = dc.feat.MACCSKeysFingerprint()
        data_features_maccs = featurizer_maccs.featurize(data['smiles'])
        plant_features_maccs = featurizer_maccs.featurize(plant['smiles'])
        # 初始化VarianceThreshold对象
        vt_maccs = VarianceThreshold(threshold = (.98 * (1 - .98)))

        # 进行特征选择
        data_maccs_new = vt_maccs.fit_transform(data_features_maccs)
        data_maccs_mask = vt_maccs.get_support(indices=True)
        plant_maccs_features = plant_features_maccs[:, data_maccs_mask]
        
        dataset = dc.data.NumpyDataset(data_maccs_new,data['class'])
        
        splitter = dc.splits.RandomSplitter()
        train_dataset, test_dataset = splitter.train_test_split(dataset=dataset,frac_train=0.8)
        
        data_train = train_dataset.X
        data_test = plant_maccs_features
        label_train = train_dataset.y
        
        
        
    elif featname =="RDkit":
        featurizer_rdkit = dc.feat.RDKitDescriptors()
        data_features_rdkit = featurizer_rdkit.featurize(data['smiles'])
        plant_features_rdkit = featurizer_rdkit.featurize(plant['smiles'])
        
        # 初始化VarianceThreshold对象
        vt_rdkit = VarianceThreshold(threshold = (.98 * (1 - .98)))

        # 进行特征选择
        data_rdkit_new = vt_rdkit.fit_transform(data_features_rdkit)
        data_rdkit_mask = vt_rdkit.get_support(indices=True)
        plant_rdkit_features = plant_features_rdkit[:, data_rdkit_mask]
        
        dataset = dc.data.NumpyDataset(data_rdkit_new,data['class'])
        
        splitter = dc.splits.RandomSplitter()
        train_dataset, test_dataset = splitter.train_test_split(dataset=dataset,frac_train=0.8)
        
        data_train = train_dataset.X
        data_test = plant_rdkit_features
        label_train = train_dataset.y
     
    else:
        pass
    return  data_train, data_test, label_train

# 定义模型

In [20]:
def SelectModel(modelname):

    if modelname == "rf_maccs":
        model = rf_maccs_model

    elif modelname == "rf_rdkit":
        model = rf_rdkit_model
        
    elif modelname == "svm_maccs":
        model = svm_maccs_model

    elif modelname == "xgb_maccs":
        model = xgb_maccs_model

    else:
        pass
    
    return model

In [33]:
def get_oof(clf,n_folds,X_train,y_train,X_test):
    ntrain = X_train.shape[0]
    ntest =  X_test.shape[0]
    classnum = len(np.unique(y_train))
    kf = KFold(n_splits=n_folds,random_state=42,shuffle=True)
    oof_train = np.zeros((ntrain,classnum))
    oof_test = np.zeros((ntest,classnum))

    for i,(train_index, test_index) in enumerate(kf.split(X_train)):
        kf_X_train = X_train[train_index] # 数据
        kf_y_train = y_train[train_index] # 标签

        kf_X_test = X_train[test_index]  # k-fold的验证集

        #clf.fit(kf_X_train, kf_y_train)
        oof_train[test_index] = clf.predict_proba(kf_X_test)

        oof_test += clf.predict_proba(X_test)
    oof_test = oof_test/float(n_folds)
    return oof_train, oof_test

In [22]:
# 使用stacking方法的时候
# 第一级，重构特征当做第二级的训练集
modelist = ['rf_maccs','rf_rdkit','svm_maccs','xgb_maccs']
newfeature_list = []
newtestdata_list = []

for modelname in modelist:
    clf_first = SelectModel(modelname)
    
    if modelname == 'rf_maccs':
        data_train, data_test, label_train = featurizer('MACCS')
    elif modelname == 'rf_rdkit':
        data_train, data_test, label_train = featurizer('RDkit')
    elif modelname == 'svm_maccs':
        data_train, data_test, label_train = featurizer('MACCS')
    elif modelname == 'xgb_maccs':
        data_train, data_test, label_train = featurizer('MACCS')
    else:
        pass
        
    
    oof_train_ ,oof_test_= get_oof(clf=clf_first,n_folds=5,X_train=data_train,y_train=label_train,X_test=data_test)
    newfeature_list.append(oof_train_)
    newtestdata_list.append(oof_test_)

In [24]:
# 特征组合
newfeature = reduce(lambda x,y:np.concatenate((x,y),axis=1),newfeature_list)    
newtestdata = reduce(lambda x,y:np.concatenate((x,y),axis=1),newtestdata_list)

# 第二级，使用上一级输出的当做训练集
clf_second1 = RandomForestClassifier()
clf_second1.fit(newfeature, label_train)

In [25]:
pred_proba = clf_second1.predict_proba(newtestdata)

In [26]:
test_pred_list = []
for test_score in pred_proba:
    test_score = test_score[1]
    test_pred_list.append(test_score)

In [27]:
test_pred_array = np.array(test_pred_list)

In [28]:
test_pred_array

array([0.94, 0.95, 0.96, ..., 0.96, 0.91, 0.99])

In [29]:
# 创建 DataFrame
df = pd.DataFrame({
    'name':plant['Volatile_compounds'],
    'smiles':plant['smiles'],
    'final_pred':test_pred_array
})

In [30]:
df

Unnamed: 0,name,smiles,final_pred
0,1-Pentanol,CCCCCO,0.94
1,Palmitic Acid,CCCCCCCCCCCCCCCC(=O)O,0.95
2,Methyl palmitate,CCCCCCCCCCCCCCCC(=O)OC,0.96
3,"(Z)-1-(2,6,6-Trimethyl-1-cyclohexen-1-yl)-2-bu...",CC=CC(=O)C1=C(CCCC1(C)C)C,0.98
4,Bornyl formate,CC1(C2CCC1(C(C2)OC=O)C)C,0.91
...,...,...,...
2391,"1,3-Dimethylcyclohexene",CC1CCCC(=C1)C,0.98
2392,1-Methyl-2-pyridone,CN1C=CC=CC1=O,0.16
2393,alpha-Bourbonene,CC1=CCC2C1C3C2(CCC3C(C)C)C,0.96
2394,Deoxysericealactone,CC1=C2CC(C(CC2OC1=O)(C)C=C)C(=C)C(=O)OC,0.91


In [31]:
df.to_csv('iplant(0.6).csv')