In [5]:
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
import numpy as np

from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score

import sys
sys.path.insert(0, '../src/')

#load the data for one experiment, parameter: experiment_number
from read_data import read_exp 

#apply PCA, parameters: n_components,data
from pca import PCA_calcV,PCA_transform

#plot the pictures, parameters: images, nb_rows, nb_cols
from data_vis import plot_image
from data_vis import plot_image_grayscale

#decision tree model, parameters: (x_train,y_train,x_test,y_test)
#Note: the impute data should be 2D -- (picture_num*pixel values)
#Use reshape to flatten the original data
from decision_tree import build_DT



In [6]:
def rgb2gray(rgb):
    return np.dot(rgb[:,:,:,:3], [0.2989, 0.5870, 0.1140])




def model_metrics (model_name, model_func,if_gray_scale, if_pca, pca_dim = 2):


    #create a panda dataframe to save the result
    result = pd.DataFrame(columns =['classifier','exp_num','train_acc','test_acc','train_f1','test_f1'])

    for exp_num in range(6):

        exp_num = exp_num +1
        #load data
        x_train,y_train,x_test,y_test = read_exp(exp_num)
        
        if (if_gray_scale):
            
            x_train= rgb2gray(x_train)
            x_test = rgb2gray(x_test)
            x_train = np.reshape(x_train, (x_train.shape[0],100*100))
            x_test = np.reshape(x_test, (x_test.shape[0],100*100))
        else:
            x_train = np.reshape(x_train, (x_train.shape[0],100*100*3))
            x_test = np.reshape(x_test, (x_test.shape[0],100*100*3))

        #apply pca
        if(if_pca):
            
            vecs = PCA_calcV(pca_dim,x_train)
            x_train = PCA_transform(vecs,x_train)
            x_test = PCA_transform(vecs,x_test)

        #build model
        y_train_pred, y_test_pred = model_func(x_train,y_train,x_test,y_test)

        #calculate metrics
        train_sc = accuracy_score(y_train, y_train_pred)
        train_f1 = f1_score(y_train, y_train_pred,average='micro')
        test_sc = accuracy_score(y_test, y_test_pred)
        test_f1 = f1_score(y_test, y_test_pred,average='micro')

        #save the result
        temp = {'classifier': model_name,
                'exp_num': exp_num,
                'train_acc':train_sc,
                'test_acc':test_sc,
                'train_f1':train_f1,
                'test_f1':test_f1}

        result = result.append(temp,ignore_index=True)

    return result

In [7]:
temp = model_metrics ('decision tree', build_DT, True , True, 2)
temp

Experiment: 1
The data are x_train,y_train,x_test,y_test
Experiment: 2
The data are x_train,y_train,x_test,y_test
Experiment: 3
The data are x_train,y_train,x_test,y_test
Experiment: 4
The data are x_train,y_train,x_test,y_test
Experiment: 5
The data are x_train,y_train,x_test,y_test
Experiment: 6
The data are x_train,y_train,x_test,y_test


Unnamed: 0,classifier,exp_num,train_acc,test_acc,train_f1,test_f1
0,decision tree,1,1.0,1.0,1.0,1.0
1,decision tree,2,0.992886,0.5,0.992886,0.5
2,decision tree,3,1.0,1.0,1.0,1.0
3,decision tree,4,1.0,1.0,1.0,1.0
4,decision tree,5,1.0,0.70082,1.0,0.70082
5,decision tree,6,1.0,0.808108,1.0,0.808108


In [2]:
read_exp(3)

Experiment: 3
The data are x_train,y_train,x_test,y_test


(array([[[[255, 255, 255],
          [255, 255, 255],
          [255, 255, 255],
          ...,
          [255, 255, 255],
          [255, 255, 255],
          [255, 255, 255]],
 
         [[255, 255, 255],
          [255, 255, 255],
          [255, 255, 255],
          ...,
          [255, 255, 255],
          [255, 255, 255],
          [255, 255, 255]],
 
         [[255, 255, 255],
          [255, 255, 255],
          [255, 255, 255],
          ...,
          [255, 255, 255],
          [255, 255, 255],
          [255, 255, 255]],
 
         ...,
 
         [[255, 255, 255],
          [255, 255, 255],
          [255, 255, 255],
          ...,
          [255, 255, 255],
          [255, 255, 255],
          [255, 255, 255]],
 
         [[255, 255, 255],
          [255, 255, 255],
          [255, 255, 255],
          ...,
          [255, 255, 255],
          [255, 255, 255],
          [255, 255, 255]],
 
         [[255, 255, 255],
          [255, 255, 255],
          [255, 255, 255],
   