Link to the dataset: http://www.nlpr.ia.ac.cn/pal/trafficdata/recognition.html

# Chinese Traffic Sign Recognition. Part II

## Import libraries

In [1]:
import numpy as np
import pandas as pd
import cv2
from skimage import color, exposure, transform

In [2]:
from skimage import io
import os
import glob

In [3]:
from sklearn.metrics import accuracy_score

In [19]:
from sklearn.ensemble import RandomForestClassifier

In [4]:
import warnings
warnings.filterwarnings("ignore")

In [5]:
from keras.optimizers import SGD
from cnn import cnn

Using TensorFlow backend.


In [6]:
from keras.models import Model

In [7]:
import warnings
warnings.filterwarnings("ignore")

## Import CNN model for feature extraction

In [8]:
img_size = 200

In [9]:
model = cnn(img_size,58)
lr = 0.01
sgd = SGD(lr=lr, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy',
              optimizer=sgd,
              metrics=['accuracy'])

In [10]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 32, 200, 200)      896       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 32, 198, 198)      9248      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 32, 99, 99)        0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 32, 99, 99)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 64, 99, 99)        18496     
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 64, 97, 97)        36928     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 64, 48, 48)        0         
__________

In [11]:
layer = Model(inputs=model.input,
                outputs=model.get_layer('flatten_1').output)

## Load training and test data with features extracted by CNN¶

In [12]:
def get_class(img_path):
    return int(img_path.split('/')[-1].split('_')[0])

In [13]:
features=np.zeros(shape=(4170,67712))

In [14]:
root_dir = 'chn/train_images'
labels = []
i = 0

all_img_paths = glob.glob(os.path.join(root_dir, '*'))
np.random.shuffle(all_img_paths)
for img_path in all_img_paths:
    img = cv2.imread(img_path, cv2.IMREAD_COLOR)
    img = cv2.resize(img, (img_size, img_size))
    img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
    # roll color axis to axis 0
    img = np.rollaxis(img, -1)
    img = np.expand_dims(img, axis=0)
    out = layer.predict(img)
    features[i]=out
    i += 1
    label = get_class(img_path)
    labels.append(label)

In [15]:
weights = dict(4170/pd.value_counts(pd.Series(labels)))

In [16]:
feature_col=[]
for i in range(67712):
    feature_col.append("f_"+str(i))
    i+=1

In [17]:
#Create DataFrame with features and coloumn name
train_features=pd.DataFrame(data=features,columns=feature_col)
feature_col = np.array(feature_col)

print('Training Features Shape:', train_features.shape)

Training Features Shape: (4170, 67712)


In [18]:
train_features.head()

Unnamed: 0,f_0,f_1,f_2,f_3,f_4,f_5,f_6,f_7,f_8,f_9,...,f_67702,f_67703,f_67704,f_67705,f_67706,f_67707,f_67708,f_67709,f_67710,f_67711
0,10.16429,2.641722,0.128619,2.302093,2.8544,-0.0,-0.0,7.431935,6.443797,-0.0,...,6.986444,-0.0,0.818436,2.875782,6.168141,2.275142,4.842099,2.264616,-0.0,-0.0
1,6.032022,1.678901,-0.0,0.983069,1.691319,0.473639,0.403501,1.313415,2.885647,-0.0,...,2.906425,0.037887,0.420077,1.395919,1.071897,0.942936,2.166385,1.51092,-0.0,-0.0
2,3.435413,0.893833,-0.0,0.865357,0.750154,0.010185,-0.0,2.146124,2.128632,-0.0,...,1.912038,-0.0,-0.0,0.743355,1.252568,0.400162,1.185081,0.761855,-0.0,-0.0
3,5.420577,1.201374,-0.0,0.9371,2.005418,0.433577,-0.0,3.812572,4.083014,-0.0,...,5.892561,-0.0,0.264951,1.606039,4.843041,1.951752,4.375151,1.849182,-0.0,-0.0
4,1.838523,0.432497,0.00529,0.303998,0.498672,0.106922,-0.0,1.229022,1.178539,-0.0,...,5.888288,-0.0,0.700047,1.956574,4.839325,1.749833,3.770623,1.613146,-0.0,-0.0


In [20]:
test_features=np.zeros(shape=(1994,67712))

In [21]:
root_dir = 'chn/test_images'
labels_test = []
i = 0

all_img_paths = glob.glob(os.path.join(root_dir, '*'))
np.random.shuffle(all_img_paths)
for img_path in all_img_paths:
    img = cv2.imread(img_path, cv2.IMREAD_COLOR)
    img = cv2.resize(img, (img_size, img_size))
    img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
    # roll color axis to axis 0
    img = np.rollaxis(img, -1)
    img = np.expand_dims(img, axis=0)
    out = layer.predict(img)
    test_features[i]=out
    i += 1
    label = get_class(img_path)
    labels_test.append(label)

In [22]:
#Create DataFrame with features and coloumn name
test_features=pd.DataFrame(data=test_features,columns=feature_col)
feature_col = np.array(feature_col)

print('Training Features Shape:', test_features.shape)

Training Features Shape: (1994, 67712)


## Train Random Forest classifier

In [23]:
rf = RandomForestClassifier(n_estimators = 300, random_state = 100)

rf.fit(train_features, labels)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=300, n_jobs=None,
            oob_score=False, random_state=100, verbose=0, warm_start=False)

In [24]:
preds = rf.predict(test_features)

In [25]:
accuracy_score(preds,labels_test)

0.5025075225677031

## Train Random Forest classifier with weights

In [27]:
rf2 = RandomForestClassifier(n_estimators=300, random_state=100, class_weight=weights)

In [28]:
rf2.fit(train_features, labels)

RandomForestClassifier(bootstrap=True,
            class_weight={28: 9.349775784753364, 54: 12.87037037037037, 3: 16.03846153846154, 5: 21.49484536082474, 55: 25.74074074074074, 35: 26.73076923076923, 7: 27.43421052631579, 30: 27.8, 16: 29.366197183098592, 11: 30.217391304347824, 17: 32.07692307692308, 14: 32.578125, 26: 33.095238095238095, 0: 35.33...21.25, 51: 521.25, 18: 521.25, 57: 695.0, 19: 1042.5, 33: 1042.5, 25: 2085.0, 9: 2085.0, 53: 2085.0},
            criterion='gini', max_depth=None, max_features='auto',
            max_leaf_nodes=None, min_impurity_decrease=0.0,
            min_impurity_split=None, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            n_estimators=300, n_jobs=None, oob_score=False,
            random_state=100, verbose=0, warm_start=False)

In [29]:
preds2 = rf2.predict(test_features)

In [None]:
accuracy_score(preds2,labels_test)