In [1]:
import pandas as pd
import numpy as np
import scipy as sp

In [2]:
df = pd.read_csv('train.csv')

In [3]:
df.columns[range(668, 673)]

Index([u'IsFoodGood', u'IsServiceGood', u'IsAmbianceGood', u'IsDealsGood',
       u'IsPriceGood'],
      dtype='object')

In [4]:
df_X = df[df.columns.delete(range(668, 673))]

In [5]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import BernoulliNB

In [6]:
labels = list(df.columns[range(668, 673)])

In [7]:
label_classifier_DTC = {}
for label in labels:
    label_classifier_DTC[label] = DecisionTreeClassifier().fit(df_X, df[label])

In [8]:
label_classifier_RF = {}
for label in labels:
    label_classifier_RF[label] = RandomForestClassifier().fit(df_X, df[label])

In [9]:
label_classifier_BNB = {}
for label in labels:
    label_classifier_BNB[label] = BernoulliNB().fit(df_X, df[label])

In [10]:
df_test = pd.read_csv('test.csv')

In [11]:
test_X = df_test[df_test.columns.delete(range(668, 673))]
test_y = df_test.loc[:, ['IsFoodGood', 'IsServiceGood', 'IsAmbianceGood','IsDealsGood', 'IsPriceGood']]

In [12]:
label_result_DTC = {}
for label in labels:
    label_result_DTC[label] = label_classifier_DTC[label].predict(test_X)

In [13]:
label_result_RF = {}
for label in labels:
    label_result_RF[label] = label_classifier_RF[label].predict(test_X)

In [14]:
label_result_BNB = {}
for label in labels:
    label_result_BNB[label] = label_classifier_BNB[label].predict(test_X)

In [15]:
result_DTC = pd.DataFrame(label_result_DTC).loc[:, \
                                ['IsFoodGood', 'IsServiceGood', 'IsAmbianceGood','IsDealsGood', 'IsPriceGood']]

In [16]:
result_RF = pd.DataFrame(label_result_RF).loc[:, \
                                ['IsFoodGood', 'IsServiceGood', 'IsAmbianceGood','IsDealsGood', 'IsPriceGood']]

In [17]:
result_BNB = pd.DataFrame(label_result_BNB).loc[:, \
                                ['IsFoodGood', 'IsServiceGood', 'IsAmbianceGood','IsDealsGood', 'IsPriceGood']]

In [18]:
df_report = pd.DataFrame(columns=['algorithm', 'precision', 'recall', 'harmonic mean'])

In [19]:
import classify_helper as helper

In [20]:
report_DTC = helper.report_precision_and_recall(np.array(test_y), np.array(result_DTC), 'DecisionTree')

In [21]:
df_report = df_report.append(report_DTC, ignore_index=True)

In [22]:
report_RF = helper.report_precision_and_recall(np.array(test_y), np.array(result_RF), 'RandomForest')

In [23]:
df_report = df_report.append(report_RF, ignore_index=True)

In [24]:
report_BNB = helper.report_precision_and_recall(np.array(test_y), np.array(result_BNB), 'NaiveBayes')

In [25]:
df_report = df_report.append(report_BNB, ignore_index=True)

In [26]:
df_report

Unnamed: 0,algorithm,precision,recall,harmonic mean
0,DecisionTree,0.59699,0.588861,0.592897
1,RandomForest,0.662968,0.549065,0.600664
2,NaiveBayes,0.611327,0.562704,0.586008


In [27]:
import pickle
with open('df_report_br.pkl', 'wb') as f:
    pickle.dump(df_report, f)