In [1]:
import numpy as np
import pandas as pd
import cv2
from skimage import color, exposure, transform

In [2]:
from skimage import io
import os
import glob

In [3]:
import warnings
warnings.filterwarnings("ignore")

In [4]:
img_size = 200

In [5]:
def get_class(img_path):
    return int(img_path.split('/')[-1].split('_')[0])

In [6]:
from keras.optimizers import SGD
from cnn import cnn

Using TensorFlow backend.


In [7]:
model = cnn(img_size,58)

# let's train the model using SGD + momentum
lr = 0.01
sgd = SGD(lr=lr, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy',
              optimizer=sgd,
              metrics=['accuracy'])

In [8]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 32, 200, 200)      896       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 32, 198, 198)      9248      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 32, 99, 99)        0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 32, 99, 99)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 64, 99, 99)        18496     
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 64, 97, 97)        36928     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 64, 48, 48)        0         
__________

In [9]:
from keras.models import Model

In [10]:
#Pick the first Fully-Connected layer as the features which will be of dimension (1 x 4096)
fc_layer = Model(inputs=model.input,
                outputs=model.get_layer('flatten_1').output)

In [11]:
features=np.zeros(shape=(4170,67712))

In [12]:
root_dir = 'chn/train_images'
labels = []
i = 0

all_img_paths = glob.glob(os.path.join(root_dir, '*'))
np.random.shuffle(all_img_paths)
for img_path in all_img_paths:
    img = cv2.imread(img_path, cv2.IMREAD_COLOR)
    img = cv2.resize(img, (img_size, img_size))
    img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
    # roll color axis to axis 0
    img = np.rollaxis(img, -1)
    img = np.expand_dims(img, axis=0)
    fc_out = fc_layer.predict(img)
    features[i]=fc_out
    i += 1
    label = get_class(img_path)
    labels.append(label)

In [13]:
weights = dict(4170/pd.value_counts(pd.Series(labels)))

In [14]:
feature_col=[]
for i in range(67712):
    feature_col.append("f_"+str(i))
    i+=1

In [15]:
#Create DataFrame with features and coloumn name
train_features=pd.DataFrame(data=features,columns=feature_col)
feature_col = np.array(feature_col)

print('Training Features Shape:', train_features.shape)

Training Features Shape: (4170, 67712)


In [16]:
train_features.head()

Unnamed: 0,f_0,f_1,f_2,f_3,f_4,f_5,f_6,f_7,f_8,f_9,...,f_67702,f_67703,f_67704,f_67705,f_67706,f_67707,f_67708,f_67709,f_67710,f_67711
0,11.598362,-0.0,1.402852,0.782177,-0.0,0.592898,-0.0,-0.0,-0.0,5.907177,...,0.416431,8.404563,3.609438,9.47194,-0.0,-0.0,0.472691,18.07176,6.395085,-0.0
1,15.457705,-0.0,1.255111,2.472802,-0.0,1.364558,-0.0,-0.0,-0.0,8.699785,...,2.207372,11.659542,5.516461,10.099456,-0.0,-0.0,0.451292,22.230301,7.521552,-0.0
2,8.755809,-0.0,0.144327,0.668798,-0.0,0.50448,-0.0,-0.0,-0.0,4.203548,...,0.248999,4.416994,2.7547,6.235694,0.698579,-0.0,1.981976,10.901007,3.142677,-0.0
3,18.097448,-0.0,1.4197,2.432855,-0.0,1.586319,-0.0,-0.0,-0.0,10.220373,...,2.239665,12.368978,5.060144,9.938421,-0.0,-0.0,0.669937,22.822136,7.763309,-0.0
4,10.592823,-0.0,0.549146,1.956214,-0.0,1.165373,-0.0,-0.0,-0.0,5.830818,...,2.310171,14.157263,7.132648,12.166813,-0.0,-0.0,1.198488,24.739262,8.082868,-0.0


In [17]:
from sklearn.ensemble import RandomForestClassifier

In [18]:
rf = RandomForestClassifier(n_estimators = 300, random_state = 100)

rf.fit(train_features, labels)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=300, n_jobs=None,
            oob_score=False, random_state=100, verbose=0, warm_start=False)

In [19]:
test_features=np.zeros(shape=(1994,67712))

In [20]:
root_dir = 'chn/test_images'
labels_test = []
i = 0

all_img_paths = glob.glob(os.path.join(root_dir, '*'))
np.random.shuffle(all_img_paths)
for img_path in all_img_paths:
    img = cv2.imread(img_path, cv2.IMREAD_COLOR)
    img = cv2.resize(img, (img_size, img_size))
    img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
    # roll color axis to axis 0
    img = np.rollaxis(img, -1)
    img = np.expand_dims(img, axis=0)
    fc_out = fc_layer.predict(img)
    test_features[i]=fc_out
    i += 1
    label = get_class(img_path)
    labels_test.append(label)

In [21]:
#Create DataFrame with features and coloumn name
test_features=pd.DataFrame(data=test_features,columns=feature_col)
feature_col = np.array(feature_col)

print('Training Features Shape:', test_features.shape)

Training Features Shape: (1994, 67712)


In [22]:
preds = rf.predict(test_features)

In [23]:
from sklearn.metrics import accuracy_score

In [24]:
accuracy_score(preds,labels_test)

0.5376128385155466

In [27]:
from sklearn.ensemble import GradientBoostingClassifier

In [30]:
gb = GradientBoostingClassifier(n_estimators = 10, random_state = 100)

In [31]:
gb.fit(train_features, labels)

GradientBoostingClassifier(criterion='friedman_mse', init=None,
              learning_rate=0.1, loss='deviance', max_depth=3,
              max_features=None, max_leaf_nodes=None,
              min_impurity_decrease=0.0, min_impurity_split=None,
              min_samples_leaf=1, min_samples_split=2,
              min_weight_fraction_leaf=0.0, n_estimators=10,
              n_iter_no_change=None, presort='auto', random_state=100,
              subsample=1.0, tol=0.0001, validation_fraction=0.1,
              verbose=0, warm_start=False)

In [34]:
preds_gb = gb.predict(test_features)

In [36]:
accuracy_score(preds_gb,labels_test)

0.3650952858575727