# Data Loading

In [1]:
import pandas as pd
import os
import keras
from keras import backend as K
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold
from keras.utils import np_utils
from keras.models import Sequential, Model
from keras.layers.merge import concatenate
from keras.preprocessing.image import ImageDataGenerator
from keras.layers import  Input, Dense, Conv2D, Dropout, Flatten, MaxPooling2D, LSTM
from keras.datasets import cifar10
from keras.callbacks import ModelCheckpoint
from keras.models import model_from_json

Using TensorFlow backend.


In [2]:
def load_data(dataset_path='./npy_files'):
    X = np.load(os.path.join(dataset_path,'X.npy'))
    y = np.load(os.path.join(dataset_path,'y.npy'))
    folds = np.load(os.path.join(dataset_path,'folds.npy'))
    feature_class = np.load(os.path.join(dataset_path, 'feature_class.npy'))
    facs = pd.read_csv('save_data/npy_files/facs_multi.csv',index_col=None)

    # rescale [0,255] -> [0,2]    
    X = X.astype('float32')/255*2


    return X, y, folds, feature_class, facs

In [3]:
X, y, folds, feature_class, facs = load_data('./save_data/npy_files/')
del facs['Unnamed: 0']
del facs['10']
del facs['26']
del facs['0']

In [4]:
from sklearn.model_selection import train_test_split

In [5]:
feature_class_single = np.zeros((1307, 4040))
for index, inst in enumerate(feature_class):
    feature_class_single[index] = inst[0]

In [83]:
X_train, X_test, facs_train_X, facs_test_X = \
        train_test_split(feature_class_single, facs, test_size=0.4)

In [84]:
feature_train, feature_test, facs_train, facs_test = \
        train_test_split(feature_class_single, facs, test_size=0.4)

In [53]:
facs_test_X.shape

(262, 15)

In [85]:
X_cv = X.reshape((1307, -1))

## Each Accuracy Function

In [74]:
def each_accuracy_calculation(pred, test):
    each_accuracy = {}
    pred_df = pd.DataFrame(pred, columns=facs.columns.astype(int))
    for col_index, au_code in enumerate(pred_df.columns):
        each_accuracy[int(au_code)] = 0
        for index, elem in enumerate(pred_df[au_code]):
            if elem == test.iloc[index][col_index]:
                each_accuracy[int(au_code)] += 1
    
    for key in each_accuracy:
        each_accuracy[key] /= test.shape[0]
        each_accuracy[key] *= 100
    return each_accuracy

In [103]:
each_accuracy_calculation(decision_tree_raw_pred, facs_test_X)

{1: 97.51434034416826,
 2: 97.131931166348,
 4: 92.35181644359464,
 5: 94.83747609942638,
 6: 93.88145315487571,
 7: 93.88145315487571,
 9: 99.61759082217974,
 12: 97.89674952198853,
 15: 97.70554493307839,
 17: 92.92543021032505,
 20: 98.66156787762907,
 23: 97.70554493307839,
 24: 96.5583173996176,
 25: 97.131931166348,
 27: 98.2791586998088}

# Classifiers

## Decision Tree

### Raw Value

In [87]:
from sklearn.tree import DecisionTreeClassifier

decision_tree_raw = DecisionTreeClassifier()
decision_tree_raw = decision_tree_raw.fit(X_train, facs_train_X)

In [88]:
from sklearn.metrics import accuracy_score

accuracy_score(decision_tree_raw.predict(X_test), facs_test_X)

0.7476099426386233

In [89]:
decision_tree_raw_pred = decision_tree_raw.predict(X_test)

In [99]:
each_accuracy_calculation(decision_tree_raw_pred, facs_test_X)

{1: 97.51434034416826,
 2: 97.131931166348,
 4: 92.35181644359464,
 5: 94.83747609942638,
 6: 93.88145315487571,
 7: 93.88145315487571,
 9: 99.61759082217974,
 12: 97.89674952198853,
 15: 97.70554493307839,
 17: 92.92543021032505,
 20: 98.66156787762907,
 23: 97.70554493307839,
 24: 96.5583173996176,
 25: 97.131931166348,
 27: 98.2791586998088}

## CV

In [76]:
from sklearn.model_selection import cross_val_score

In [104]:
decision_tree_raw_cv = DecisionTreeClassifier()
cross_val_score(decision_tree_raw_cv, X_cv, facs.values)

array([0.21100917, 0.2706422 , 0.27356322])

### Feature value

In [90]:
decision_tree_feature = DecisionTreeClassifier()
decision_tree_feature = decision_tree_feature.fit(feature_train, facs_train)

In [91]:
accuracy_score(decision_tree_feature.predict(feature_test), facs_test)

0.762906309751434

In [92]:
decision_tree_feature_pred = decision_tree_feature.predict(feature_test)

In [100]:
each_accuracy_calculation(decision_tree_feature_pred, facs_test)

{1: 97.70554493307839,
 2: 97.131931166348,
 4: 94.64627151051626,
 5: 95.98470363288719,
 6: 95.02868068833652,
 7: 93.11663479923517,
 9: 98.8527724665392,
 12: 98.66156787762907,
 15: 96.74952198852772,
 17: 94.07265774378585,
 20: 99.04397705544933,
 23: 95.79349904397706,
 24: 95.79349904397706,
 25: 96.74952198852772,
 27: 98.8527724665392}

## Random Forest

### Raw Value

In [93]:
from sklearn.ensemble import RandomForestClassifier

In [94]:
random_forest_raw = RandomForestClassifier()
random_forest_raw = random_forest_raw.fit(X_train, facs_train_X)
accuracy_score(random_forest_raw.predict(X_test), facs_test_X)

0.6175908221797323

In [95]:
random_forest_raw_pred = random_forest_raw.predict(X_test)

### Feature value

In [96]:
random_forest_feature = RandomForestClassifier()
random_forest_feature = random_forest_feature.fit(feature_train, facs_train)
accuracy_score(random_forest_feature.predict(feature_test), facs_test)

0.6252390057361377

In [97]:
random_forest_feature_pred = random_forest_feature.predict(X_test)

In [None]:
each_accuracy_calculation()