In [1]:
import numpy as np
import pandas as pd
import cv2
from skimage import color, exposure, transform

In [2]:
from skimage import io
import os
import glob

In [3]:
import warnings
warnings.filterwarnings("ignore")

In [4]:
from alexnet import alexnet

Using TensorFlow backend.


In [5]:
def get_class(img_path):
    return int(img_path.split('/')[-1].split('_')[0])

In [6]:
img_size = 227
num_classes = 58

In [7]:
net = alexnet((img_size,img_size,3),num_classes)
net.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 227, 227, 96)      34944     
_________________________________________________________________
activation_1 (Activation)    (None, 227, 227, 96)      0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 113, 113, 96)      0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 113, 113, 128)     307328    
_________________________________________________________________
activation_2 (Activation)    (None, 113, 113, 128)     0         
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 56, 56, 128)       0         
_________________________________________________________________
zero_padding2d_1 (ZeroPaddin (None, 58, 58, 128)       0         
__________

In [8]:
from keras.models import Model

In [9]:
#Pick the first Fully-Connected layer as the features which will be of dimension (1 x 4096)
fc_layer = Model(inputs=net.input,
                outputs=net.get_layer('dense_1').output)

In [10]:
features=np.zeros(shape=(4170,4096))

In [11]:
root_dir = 'chn/train_images'
labels = []
i = 0

all_img_paths = glob.glob(os.path.join(root_dir, '*'))
np.random.shuffle(all_img_paths)
for img_path in all_img_paths:
    img = cv2.imread(img_path, cv2.IMREAD_COLOR)
    img = cv2.resize(img, (img_size, img_size))
    img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
    img = np.expand_dims(img, axis=0)
    fc_out = fc_layer.predict(img)
    features[i]=fc_out
    i += 1
    label = get_class(img_path)
    labels.append(label)

In [12]:
weights = dict(4170/pd.value_counts(pd.Series(labels)))

In [13]:
feature_col=[]
for i in range(4096):
    feature_col.append("f_"+str(i))
    i+=1

In [14]:
#Create DataFrame with features and coloumn name
train_features=pd.DataFrame(data=features,columns=feature_col)
feature_col = np.array(feature_col)

print('Training Features Shape:', train_features.shape)

Training Features Shape: (4170, 4096)


In [15]:
train_features.head()

Unnamed: 0,f_0,f_1,f_2,f_3,f_4,f_5,f_6,f_7,f_8,f_9,...,f_4086,f_4087,f_4088,f_4089,f_4090,f_4091,f_4092,f_4093,f_4094,f_4095
0,0.147305,-0.089977,-0.534327,-0.815233,-2.325764,0.684446,0.169858,0.044565,-0.2176,0.45053,...,-0.793185,-0.58465,1.130687,0.607453,-1.18567,-1.144166,0.5086,-1.403604,-0.215452,1.235707
1,0.035624,-0.249447,-0.394788,-0.933148,-2.275973,0.651047,0.296212,0.069433,0.00816,0.585722,...,-0.802366,-0.420069,1.17267,0.578164,-1.244327,-1.427108,0.591066,-1.496932,-0.174917,1.060907
2,0.142312,-0.11697,-0.586533,-0.966759,-2.714195,0.811704,0.22273,0.074293,-0.213224,0.515086,...,-0.916016,-0.623277,1.336095,0.661686,-1.314371,-1.415309,0.60678,-1.667519,-0.26952,1.369858
3,0.047218,-0.155637,-0.229608,-0.640501,-1.429647,0.369578,0.188506,0.087002,-0.003211,0.392455,...,-0.472585,-0.329089,0.759969,0.36398,-0.819202,-0.861127,0.407569,-0.918252,-0.109248,0.745959
4,0.101424,-0.170166,-0.475565,-0.9861,-2.602593,0.766033,0.298426,0.083459,-0.088367,0.481212,...,-0.836984,-0.501946,1.300516,0.647024,-1.278921,-1.455074,0.687796,-1.601824,-0.242419,1.210108


In [16]:
from sklearn.ensemble import RandomForestClassifier

In [17]:
rf = RandomForestClassifier(n_estimators = 100, random_state = 100, class_weight=weights)

rf.fit(train_features, labels)

RandomForestClassifier(bootstrap=True,
            class_weight={28: 9.349775784753364, 54: 12.87037037037037, 3: 16.03846153846154, 5: 21.49484536082474, 55: 25.74074074074074, 35: 26.73076923076923, 7: 27.43421052631579, 30: 27.8, 16: 29.366197183098592, 11: 30.217391304347824, 17: 32.07692307692308, 14: 32.578125, 26: 33.095238095238095, 0: 35.33...21.25, 51: 521.25, 18: 521.25, 57: 695.0, 19: 1042.5, 33: 1042.5, 25: 2085.0, 9: 2085.0, 53: 2085.0},
            criterion='gini', max_depth=None, max_features='auto',
            max_leaf_nodes=None, min_impurity_decrease=0.0,
            min_impurity_split=None, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            n_estimators=100, n_jobs=None, oob_score=False,
            random_state=100, verbose=0, warm_start=False)

In [18]:
test_features=np.zeros(shape=(1994,4096))

In [19]:
root_dir = 'chn/test_images'
labels_test = []
i = 0

all_img_paths = glob.glob(os.path.join(root_dir, '*'))
np.random.shuffle(all_img_paths)
for img_path in all_img_paths:
    img = cv2.imread(img_path, cv2.IMREAD_COLOR)
    img = cv2.resize(img, (img_size, img_size))
    img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
    img = np.expand_dims(img, axis=0)
    fc_out = fc_layer.predict(img)
    test_features[i]=fc_out
    i += 1
    label = get_class(img_path)
    labels_test.append(label)

In [20]:
#Create DataFrame with features and coloumn name
test_features=pd.DataFrame(data=test_features,columns=feature_col)
feature_col = np.array(feature_col)

print('Training Features Shape:', test_features.shape)

Training Features Shape: (1994, 4096)


In [21]:
preds = rf.predict(test_features)

In [22]:
from sklearn.metrics import accuracy_score

In [23]:
accuracy_score(preds,labels_test)

0.2728184553660983

In [24]:
preds

array([17, 54, 54, ...,  5,  4, 55])

In [25]:
np.array(labels_test)

array([17,  3, 39, ..., 15,  7, 15])