# Machine learning models for association inference scores

In [1]:
import numpy as np
import pandas as pd
from tqdm import tqdm
import json
import multiprocessing as mp

import seaborn as sns
import matplotlib.pyplot as plt
from sklearn import preprocessing
from sklearn.model_selection import train_test_split

import re
import operator

In [2]:
def trans_float32(data, index_col):
    df = data.set_index(index_col)
    df = df.astype('float32').reset_index()
    return df

# Read all intermediate tables

In [3]:
# Food related
food_feature_matched = pd.read_csv('../data/food_feature_matched.csv')
# food_disease_matched = pd.read_csv('/home/mw/project/特征中间表/food_disease_matched.csv', index_col=0)
match_relation = pd.read_csv('/home/mw/project/data/中间表/foodname_match_relation.csv')

# Disease related
disease_mirna_total = pd.read_csv('../data/disease_mirna_sim.csv')
disease_gene_total = pd.read_csv('../data/disease_simiparity_gene_direct.csv')
disease_name_total = pd.read_csv('../data/disease_go_sim.csv')
# Label Related
disease_food_infer = pd.read_csv('../data//disease_food_infer_4.csv')

In [4]:
# disease_mirna_total
# disease_gene_total
# disease_name_total
# disease_food_infer

In [5]:
# disease_food_infer = pd.read_csv('/home/mw/input/combine_disease9321/disease_food_infer_direevi.csv')
disease_food_infer = disease_food_infer[disease_food_infer['food_lower'].isin(list(match_relation['food_disease']))]
disease_food_infer.index = disease_food_infer['food_lower']
# disease_food_infer.T[list(food_feature_matched.columns)]

In [6]:
q1 = pd.DataFrame(list(disease_food_infer['food_lower'].values),columns=['food_disease'])
match_relation = pd.merge(q1,match_relation,on='food_disease',how='inner')

In [7]:
food_feature_matched = pd.merge(match_relation,food_feature_matched,on='food_id',how='inner')
# food_feature_matched = pd.merge(match_relation,food_feature_matched,on='food_id',how='inner').drop(['food_disease','food_id'],axis=1)
print(food_feature_matched.shape)
food_feature_matched = food_feature_matched.drop(list(food_feature_matched.sum()[food_feature_matched.sum() == 0].index),axis=1)
print(food_feature_matched.shape)
food_feature_matched = food_feature_matched.fillna(0)

(579, 215)
(579, 198)


## Processing food name

In [8]:
disease_food_infer = disease_food_infer.T[list(food_feature_matched['food_disease'].values)].T
food_feature_matched = food_feature_matched.drop(['long','food_id'],axis=1)
disease_food_infer = disease_food_infer.drop(['food_lower'],axis=1)
food_feature_matched.index = food_feature_matched['food_disease']
food_feature_matched = food_feature_matched.drop(['food_disease'],axis=1)

## Processing disease name

In [9]:

def disease_matched(disease_mirna,disease_food):
    disease_mirna.index = disease_mirna.columns[1:]
    item1 = disease_mirna.columns[1:]
    item2 = disease_food.columns
    item3 = list(set(item1)&set(item2))
    item4 = disease_mirna[item3]
    print(item4.shape)
    item5 = item4.T[item3]

    disease_food[item3]



    return item5,disease_food[item3]
            

In [10]:
disease_food = disease_food_infer 
disease_mirna, disease_food_mirna = disease_matched(disease_mirna_total,disease_food)
disease_gene, disease_food_gene = disease_matched(disease_gene_total,disease_food)
disease_name, disease_food_name = disease_matched(disease_name_total,disease_food)

(997, 320)
(3404, 670)
(2109, 2109)


In [11]:
disease_name.shape

(2109, 2109)

In [12]:
# trea_disease_food.head()

In [13]:
# disease_food 
# disease_food_mirna
# disease_food_gene
# disease_food_name
# disease_mirna_total 
# disease_gene_total
# disease_name_total

disease_mirna_total                            
disease_gene_total                            
disease_name_total                            
disease_food_infer

## Create X Y

In [14]:


def generate_X_Y(disease_gene,food_feature_matched,disease_food_gene):
    try:
        columns = list(food_feature_matched.drop(['food_lower'],axis=1).columns)
    except:
        columns = list(food_feature_matched.columns)
    try:
        columns.extend(list(disease_gene.drop(['disease'],axis=1).columns))
    except:
        columns.extend(list(disease_gene.columns))

    # print(k1)
    # columns.extend(list(disease_gene.columns))
    
    # try:
    #     columns.remove(['food_lower'])
    # except:
    #     pass
    print(len(columns),len(disease_gene.columns),len(food_feature_matched.columns))
    disease_food_gene['food_lower'] = disease_food_gene.index
    Y_before = disease_food_gene.melt(id_vars=['food_lower'], var_name='disease', value_name='inference score')
    X = Y_before
    
    
    food_feature_matched['food_lower'] = food_feature_matched.index
    print(X['food_lower'].head())
    print("-----")
    print(food_feature_matched['food_lower'].head())
    X = pd.merge(X,food_feature_matched,on='food_lower',how='inner')
    disease_gene['disease'] = disease_gene.index
    X = pd.merge(X,disease_gene,on='disease',how='inner')
    x = X[columns]
    x = x.reset_index(drop=True)
    y = X[['inference score']]
    y = y.reset_index(drop=True)
    return x,y


In [15]:
# x,y = generate_X_Y(disease_gene,food_feature_matched,disease_food_gene)

## function

In [16]:
from sklearn import model_selection
from sklearn.tree import DecisionTreeClassifier, export_graphviz
from sklearn.model_selection import KFold
from sklearn import metrics
from sklearn.preprocessing import label_binarize
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.calibration import CalibratedClassifierCV
from sklearn.metrics import roc_curve, auc

from sklearn import metrics


In [17]:
def machine_learning_model(model,disease_gene,food_feature_matched,disease_food_gene,multi_label):
    x,y = generate_X_Y(disease_gene,food_feature_matched,disease_food_gene)
    ## Splitting boxes
    k=multi_label
    Y_multiple = y[y['inference score']>0][['inference score']]
    equal_frequency_cut = pd.qcut(Y_multiple['inference score'], q=k-1, duplicates="drop", labels = range(1, k))
    Y_multiple['inference score2'] = equal_frequency_cut.values
    Y_multiple = pd.concat([Y_multiple,y],axis=1)
    Y_multiple['inference score'] = Y_multiple['inference score2'].cat.add_categories(0).fillna(0)
    Y_multiple = Y_multiple.iloc[:,:1]
    kf = KFold(n_splits=5,shuffle=False)
    del y

    gene_auc_total = []
    gene_f1_total = []
    for train_index , test_index in kf.split(x):
        X_train, y_train = x.iloc[train_index], Y_multiple.iloc[train_index][['inference score']]
        X_test, y_test = x.iloc[test_index], Y_multiple.iloc[test_index][['inference score']]
    # X_train, X_test, y_train, y_test = model_selection.train_test_split(x_,Y_multiple_[['inference score']], test_size = 0.2, random_state = 1234)
        tree = model
        tree = tree.fit(X_train, y_train)
        tree_pred = tree.predict(X_test) 
        f1 = metrics.f1_score(y_test, tree_pred,average='micro')
        class_names = np.unique(y_train)
        y_binarize = label_binarize(y_test, classes=class_names)
        y_fit=label_binarize(tree_pred, classes = class_names)
        fpr, tpr, _= metrics.roc_curve(y_binarize.ravel(),y_fit.ravel())
        auc = metrics.auc(fpr, tpr)
        gene_auc_total.append(auc)
        gene_f1_total.append(f1)
    return np.mean(gene_auc_total),np.mean(gene_f1_total)


In [18]:
# def machine_learning_model(model,disease_gene,food_feature_matched,disease_food_gene,multi_label):
#     x,y = generate_X_Y(disease_gene,food_feature_matched,disease_food_gene)
#     ## 分箱
#     k=multi_label
#     print(y[y['inference score']>0][['inference score']].shape,y[y['inference score']==0][['inference score']].shape)
#     Y_multiple = y[y['inference score']>0][['inference score']]
#     # y[y['inference score']>0][['inference score']] = 0
#     # equal_frequency_cut = pd.qcut(Y_multiple['inference score'], q=k-1, duplicates="drop", labels = range(1, k))
#     Y_multiple['inference score2'] = 1
#     Y_multiple = pd.concat([Y_multiple,y],axis=1)
#     Y_multiple['inference score'] = Y_multiple['inference score2'].fillna(0)
#     Y_multiple = Y_multiple.iloc[:,:1]
#     kf = KFold(n_splits=5,shuffle=False)
    

#     gene_auc_total = []
#     gene_f1_total = []
#     for train_index , test_index in kf.split(x):
#         X_train, y_train = x.iloc[train_index], Y_multiple.iloc[train_index][['inference score']]
#         X_test, y_test = x.iloc[test_index], Y_multiple.iloc[test_index][['inference score']]
#     # X_train, X_test, y_train, y_test = model_selection.train_test_split(x_,Y_multiple_[['inference score']], test_size = 0.2, random_state = 1234)
#         tree = model
#         tree = tree.fit(X_train, y_train)
#         tree_pred = tree.predict(X_test) 
#         # f1 = metrics.f1_score(y_test, tree_pred,average='micro')
#         # class_names = np.unique(y_train)
#         # y_binarize = label_binarize(y_test, classes=class_names)
#         # y_fit=label_binarize(tree_pred, classes = class_names)
#         # fpr, tpr, _= metrics.roc_curve(y_binarize.ravel(),y_fit.ravel())
#         # auc = metrics.auc(fpr, tpr)
#         gene_auc_total.append(metrics.roc_auc_score(tree_pred,y_test))
#         gene_f1_total.append(metrics.f1_score(tree_pred,y_test))
        
#         # gene_f1_total.append(metrics.f1_score(tree_pred，y_test))
#     return np.mean(gene_auc_total),np.mean(gene_f1_total)


In [19]:
def machine_learning_model_parameter(disease_gene,food_feature_matched,disease_food_gene,multi_label):
    x,y = generate_X_Y(disease_gene,food_feature_matched,disease_food_gene)
    ## Splitting boxes
    k=multi_label
    Y_multiple = y[y['inference score']>0][['inference score']]
    # equal_frequency_cut = pd.qcut(Y_multiple['inference score'], q=k-1, duplicates="drop", labels = range(1, k))
    Y_multiple['inference score2'] = 1
    Y_multiple = pd.concat([Y_multiple,y],axis=1)
    Y_multiple['inference score'] = Y_multiple['inference score2'].fillna(0)
    Y_multiple = Y_multiple.iloc[:,:1]
    dec_tree = RandomForestClassifier()
    # kf = KFold(n_splits=5,shuffle=False)
    pipe = Pipeline(steps=[('dec_tree', dec_tree)])
    criterion = ['gini', 'entropy']
    max_depth = [2,4,6,8,10,12]
    parameters = dict(dec_tree__criterion=criterion,
                      dec_tree__max_depth=max_depth)
    clf_GS = GridSearchCV(pipe, parameters)
    clf_GS.fit(x, Y_multiple[['inference score']])
    # calibrated_forest = CalibratedClassifierCV(estimator=RandomForestClassifier(n_estimators=10))
    # param_grid = {'estimator__max_depth': [2, 4, 6, 8]}
    # search = GridSearchCV(calibrated_forest, param_grid, cv=5)
    # search.fit(x, Y_multiple[['inference score']])
    print('Best Criterion:', clf_GS.best_estimator_.get_params()['dec_tree__criterion'])
    print('Best max_depth:', clf_GS.best_estimator_.get_params()['dec_tree__max_depth'])


## Gene

In [22]:
# machine_learning_model(forest_model,disease_gene,food_feature_matched,disease_food_gene,12)

In [30]:
# dec_tree = RandomForestClassifier(max_depth=4,criterion='gini')
dec_tree = DecisionTreeClassifier(random_state=1, criterion="entropy", max_depth=2)
# dec_tree = RandomForestClassifier()
# tree_model = DecisionTreeClassifier(random_state=1, criterion="gini", max_depth=20,splitter='random',min_samples_split=20)
machine_learning_model(dec_tree,disease_gene,food_feature_matched,disease_food_gene,3)

864 670 196
0        abiyuch
1        acerola
2          acorn
3    adzuki bean
4           agar
Name: food_lower, dtype: object
-----
food_disease
abiyuch            abiyuch
acerola            acerola
acorn                acorn
adzuki bean    adzuki bean
agar                  agar
Name: food_lower, dtype: object


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


(0.7094871662449, 0.6126495549932001)

In [21]:
dec_tree = RandomForestClassifier(max_depth=4,criterion='gini')
# dec_tree = RandomForestClassifier()
# tree_model = DecisionTreeClassifier(random_state=1, criterion="gini", max_depth=20,splitter='random',min_samples_split=20)
machine_learning_model(dec_tree,disease_gene,food_feature_matched,disease_food_gene,3)

864 670 196
0        abiyuch
1        acerola
2          acorn
3    adzuki bean
4           agar
Name: food_lower, dtype: object
-----
food_disease
abiyuch            abiyuch
acerola            acerola
acorn                acorn
adzuki bean    adzuki bean
agar                  agar
Name: food_lower, dtype: object


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


(0.7177813355368486, 0.6237084473824648)

## mirna

In [31]:
dec_tree = RandomForestClassifier(max_depth=4,criterion='gini')
dec_tree = DecisionTreeClassifier(random_state=1, criterion="entropy", max_depth=2)
machine_learning_model(dec_tree,disease_mirna,food_feature_matched,disease_food_mirna,3)

515 320 196
0        abiyuch
1        acerola
2          acorn
3    adzuki bean
4           agar
Name: food_lower, dtype: object
-----
food_disease
abiyuch            abiyuch
acerola            acerola
acorn                acorn
adzuki bean    adzuki bean
agar                  agar
Name: food_lower, dtype: object


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


(0.6918009299307959, 0.5890679065743946)

## name

In [34]:
print(disease_name.shape, food_feature_matched.shape,disease_food_name.shape)

(2109, 2109) (579, 195) (579, 2109)


In [35]:
machine_learning_model(dec_tree,disease_name,food_feature_matched.iloc[500:,:],disease_food_name.iloc[500:,:],3)

2303 2109 196
0         sunflower
1       sweet basil
2      sweet cherry
3    sweet marjoram
4      sweet orange
Name: food_lower, dtype: object
-----
food_disease
sunflower              sunflower
sweet basil          sweet basil
sweet cherry        sweet cherry
sweet marjoram    sweet marjoram
sweet orange        sweet orange
Name: food_lower, dtype: object


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


(0.662152332767181, 0.5495364436895745)

In [32]:
name_auc = []
name_f1 = []
for item in range(0,10):
    item1 = item*50
    item2 = (item+1)*50
    print(item1,item2)
    print(food_feature_matched.iloc[item1:item2,:].shape,disease_food_name.iloc[item1:item2,:].shape)
    # dec_tree = RandomForestClassifier(max_depth=4,criterion='gini')
    dec_tree = DecisionTreeClassifier(random_state=1, criterion="entropy", max_depth=2)
    machine_learning_model(dec_tree,disease_name,food_feature_matched.iloc[item1:item2,:],disease_food_name.iloc[item1:item2,:],3)
    item3,item4 = machine_learning_model(dec_tree,disease_name,food_feature_matched.iloc[item1:item2,:],disease_food_name.iloc[item1:item2,:],3)
    name_auc.append(item3)
    name_f1.append(item4)

0 50
(50, 196) (50, 2109)
2303 2109 196
0        abiyuch
1        acerola
2          acorn
3    adzuki bean
4           agar
Name: food_lower, dtype: object
-----
food_disease
abiyuch            abiyuch
acerola            acerola
acorn                acorn
adzuki bean    adzuki bean
agar                  agar
Name: food_lower, dtype: object


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


2303 2109 196
0        abiyuch
1        acerola
2          acorn
3    adzuki bean
4           agar
Name: food_lower, dtype: object
-----
food_disease
abiyuch            abiyuch
acerola            acerola
acorn                acorn
adzuki bean    adzuki bean
agar                  agar
Name: food_lower, dtype: object


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


50 100
(50, 196) (50, 2109)
2303 2109 196
0       black radish
1       black radish
2       black radish
3       black raisin
4    black raspberry
Name: food_lower, dtype: object
-----
food_disease
black radish          black radish
black radish          black radish
black radish          black radish
black raisin          black raisin
black raspberry    black raspberry
Name: food_lower, dtype: object


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


2303 2109 196
0       black radish
1       black radish
2       black radish
3       black raisin
4    black raspberry
Name: food_lower, dtype: object
-----
food_disease
black radish          black radish
black radish          black radish
black radish          black radish
black raisin          black raisin
black raspberry    black raspberry
Name: food_lower, dtype: object


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


100 150
(50, 196) (50, 2109)
2303 2109 196
0        carrot
1        carrot
2    cashew nut
3       cassava
4       catfish
Name: food_lower, dtype: object
-----
food_disease
carrot            carrot
carrot            carrot
cashew nut    cashew nut
cassava          cassava
catfish          catfish
Name: food_lower, dtype: object


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


2303 2109 196
0        carrot
1        carrot
2    cashew nut
3       cassava
4       catfish
Name: food_lower, dtype: object
-----
food_disease
carrot            carrot
carrot            carrot
cashew nut    cashew nut
cassava          cassava
catfish          catfish
Name: food_lower, dtype: object


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


150 200
(50, 196) (50, 2109)
2303 2109 196
0     coriander
1          corn
2          corn
3     cornbread
4    cottonseed
Name: food_lower, dtype: object
-----
food_disease
coriander      coriander
corn                corn
corn                corn
cornbread      cornbread
cottonseed    cottonseed
Name: food_lower, dtype: object


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


2303 2109 196
0     coriander
1          corn
2          corn
3     cornbread
4    cottonseed
Name: food_lower, dtype: object
-----
food_disease
coriander      coriander
corn                corn
corn                corn
cornbread      cornbread
cottonseed    cottonseed
Name: food_lower, dtype: object


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


200 250
(50, 196) (50, 2109)
2303 2109 196
0           fireweed
1           flatfish
2           flaxseed
3    florida pompano
4    french plantain
Name: food_lower, dtype: object
-----
food_disease
fireweed                  fireweed
flatfish                  flatfish
flaxseed                  flaxseed
florida pompano    florida pompano
french plantain    french plantain
Name: food_lower, dtype: object


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


2303 2109 196
0           fireweed
1           flatfish
2           flaxseed
3    florida pompano
4    french plantain
Name: food_lower, dtype: object
-----
food_disease
fireweed                  fireweed
flatfish                  flatfish
flaxseed                  flaxseed
florida pompano    florida pompano
french plantain    french plantain
Name: food_lower, dtype: object


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


250 300
(50, 196) (50, 2109)
2303 2109 196
0            honey
1     horned melon
2      horseradish
3    hot chocolate
4    hyacinth bean
Name: food_lower, dtype: object
-----
food_disease
honey                    honey
horned melon      horned melon
horseradish        horseradish
hot chocolate    hot chocolate
hyacinth bean    hyacinth bean
Name: food_lower, dtype: object


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


2303 2109 196
0            honey
1     horned melon
2      horseradish
3    hot chocolate
4    hyacinth bean
Name: food_lower, dtype: object
-----
food_disease
honey                    honey
horned melon      horned melon
horseradish        horseradish
hot chocolate    hot chocolate
hyacinth bean    hyacinth bean
Name: food_lower, dtype: object


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


300 350
(50, 196) (50, 2109)
2303 2109 196
0       malabar plum
1    malabar spinach
2       mamey sapote
3       mammee apple
4              mango
Name: food_lower, dtype: object
-----
food_disease
malabar plum          malabar plum
malabar spinach    malabar spinach
mamey sapote          mamey sapote
mammee apple          mammee apple
mango                        mango
Name: food_lower, dtype: object


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


2303 2109 196
0       malabar plum
1    malabar spinach
2       mamey sapote
3       mammee apple
4              mango
Name: food_lower, dtype: object
-----
food_disease
malabar plum          malabar plum
malabar spinach    malabar spinach
mamey sapote          mamey sapote
mammee apple          mammee apple
mango                        mango
Name: food_lower, dtype: object


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


350 400
(50, 196) (50, 2109)
2303 2109 196
0     oyster mushroom
1         pacific cod
2     pacific herring
3    pacific rockfish
4              papaya
Name: food_lower, dtype: object
-----
food_disease
oyster mushroom      oyster mushroom
pacific cod              pacific cod
pacific herring      pacific herring
pacific rockfish    pacific rockfish
papaya                        papaya
Name: food_lower, dtype: object


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


2303 2109 196
0     oyster mushroom
1         pacific cod
2     pacific herring
3    pacific rockfish
4              papaya
Name: food_lower, dtype: object
-----
food_disease
oyster mushroom      oyster mushroom
pacific cod              pacific cod
pacific herring      pacific herring
pacific rockfish    pacific rockfish
papaya                        papaya
Name: food_lower, dtype: object


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


400 450
(50, 196) (50, 2109)
2303 2109 196
0      potato bread
1    prairie turnip
2      prickly pear
3      prickly pear
4           pummelo
Name: food_lower, dtype: object
-----
food_disease
potato bread        potato bread
prairie turnip    prairie turnip
prickly pear        prickly pear
prickly pear        prickly pear
pummelo                  pummelo
Name: food_lower, dtype: object


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


2303 2109 196
0      potato bread
1    prairie turnip
2      prickly pear
3      prickly pear
4           pummelo
Name: food_lower, dtype: object
-----
food_disease
potato bread        potato bread
prairie turnip    prairie turnip
prickly pear        prickly pear
prickly pear        prickly pear
pummelo                  pummelo
Name: food_lower, dtype: object


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


450 500
(50, 196) (50, 2109)
2303 2109 196
0       sea cucumber
1          sea trout
2           semolina
3             sesame
4    sesbania flower
Name: food_lower, dtype: object
-----
food_disease
sea cucumber          sea cucumber
sea trout                sea trout
semolina                  semolina
sesame                      sesame
sesbania flower    sesbania flower
Name: food_lower, dtype: object


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


2303 2109 196
0       sea cucumber
1          sea trout
2           semolina
3             sesame
4    sesbania flower
Name: food_lower, dtype: object
-----
food_disease
sea cucumber          sea cucumber
sea trout                sea trout
semolina                  semolina
sesame                      sesame
sesbania flower    sesbania flower
Name: food_lower, dtype: object


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [1]:
# dec_tree = RandomForestClassifier(max_depth=2,criterion='gini')
# machine_learning_model(dec_tree,disease_name,food_feature_matched,disease_food_name,3)
# x,y = generate_X_Y(disease_name,food_feature_matched,disease_food_name)
name_f1.append(0.549)
name_auc.append(0.662)
print(np.mean(name_auc),np.mean(name_f1))

2303 2109 195
0        abiyuch
1        acerola
2          acorn
3    adzuki bean
4           agar
Name: food_lower, dtype: object
-----
food_disease
abiyuch            abiyuch
acerola            acerola
acorn                acorn
adzuki bean    adzuki bean
agar                  agar
Name: food_lower, dtype: object


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [22]:
# disease_name,food_feature_matched,disease_food_name
# machine_learning_model_parameter(disease_mirna,food_feature_matched,disease_food_mirna,12)