In [1]:
%run ../DataClass.ipynb

In [2]:
X_train = np.load('./0/data/X_train.npy')
X_test = np.load('./0/data/X_test.npy')
X_val = np.load('./0/data/X_val.npy')
y_train = np.load('./0/data/y_train.npy')
y_test = np.load('./0/data/y_test.npy')
y_val = np.load('./0/data/y_val.npy')

In [3]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import RandomizedSearchCV
from pprint import pprint

# Tuning RF

In [18]:
# Number of trees in random forest
n_estimators = [int(x) for x in np.linspace(start = 20, stop = 200, num = 5)]
# Number of features to consider at every split
max_features = ['auto', 'sqrt']
# Maximum number of levels in tree
max_depth = [int(x) for x in np.linspace(1, 45, num = 3)]
# Minimum number of samples required to split a node
min_samples_split = [5, 10]


# Create the random grid
random_grid = {'n_estimators': n_estimators,
               'max_features': max_features,
               'max_depth': max_depth,
               'min_samples_split': min_samples_split,
               'random_state':[0]}

print(random_grid)

{'n_estimators': [20, 65, 110, 155, 200], 'max_features': ['auto', 'sqrt'], 'max_depth': [1, 23, 45], 'min_samples_split': [5, 10], 'random_state': [0]}


In [19]:
%%time
rf_random = RandomizedSearchCV(estimator = RandomForestClassifier(), param_distributions = random_grid, n_iter = 10, cv = 5, verbose=2)
# Fit the random search model
rf_random.fit(X_train, y_train)

Fitting 5 folds for each of 10 candidates, totalling 50 fits
[CV] END max_depth=45, max_features=sqrt, min_samples_split=10, n_estimators=155, random_state=0; total time=  33.8s
[CV] END max_depth=45, max_features=sqrt, min_samples_split=10, n_estimators=155, random_state=0; total time=  34.1s
[CV] END max_depth=45, max_features=sqrt, min_samples_split=10, n_estimators=155, random_state=0; total time=  32.1s
[CV] END max_depth=45, max_features=sqrt, min_samples_split=10, n_estimators=155, random_state=0; total time=  32.0s
[CV] END max_depth=45, max_features=sqrt, min_samples_split=10, n_estimators=155, random_state=0; total time=  32.0s
[CV] END max_depth=45, max_features=auto, min_samples_split=10, n_estimators=65, random_state=0; total time=  13.3s
[CV] END max_depth=45, max_features=auto, min_samples_split=10, n_estimators=65, random_state=0; total time=  13.5s
[CV] END max_depth=45, max_features=auto, min_samples_split=10, n_estimators=65, random_state=0; total time=  13.5s
[CV] E

RandomizedSearchCV(cv=5, estimator=RandomForestClassifier(),
                   param_distributions={'max_depth': [1, 23, 45],
                                        'max_features': ['auto', 'sqrt'],
                                        'min_samples_split': [5, 10],
                                        'n_estimators': [20, 65, 110, 155, 200],
                                        'random_state': [0]},
                   verbose=2)

In [20]:
rf_random.best_estimator_ , rf_random.best_score_

(RandomForestClassifier(max_depth=45, min_samples_split=5, n_estimators=200,
                        random_state=0),
 0.9921058965102286)

In [21]:
type(rf_random.best_estimator_)

sklearn.ensemble._forest.RandomForestClassifier

In [22]:
RF = rf_random.best_estimator_

In [23]:
RF.fit(X_train,y_train)
y_pred_val = RF.predict(X_val)
y_pred_test = RF.predict(X_test)

print('val accuracy:',accuracy_score(y_val, y_pred_val))
print('test accuracy:',accuracy_score(y_test, y_pred_test))

val accuracy: 0.9940802772162864
test accuracy: 0.9948021946289345


# GB tuning

In [33]:
# Number of trees in random forest
n_estimators = [int(x) for x in np.linspace(start = 10, stop = 40, num = 4)]
# Maximum number of levels in tree
max_depth = [int(x) for x in np.linspace(start = 5, stop = 12, num = 4)]



# Create the random grid
random_grid = {'n_estimators': n_estimators,
               'learning_rate' : [0.05, 0.1, 0.15, 0.2],
               'max_features': ['auto', 'sqrt'],
               'max_depth': max_depth,
               'random_state': [0]
              }

pprint(random_grid)

{'learning_rate': [0.05, 0.1, 0.15, 0.2],
 'max_depth': [5, 7, 9, 12],
 'max_features': ['auto', 'sqrt'],
 'n_estimators': [10, 20, 30, 40],
 'random_state': [0]}


In [36]:
%%time
GBDT_random = RandomizedSearchCV(estimator = GradientBoostingClassifier(), param_distributions = random_grid, n_iter = 10, cv = 5, verbose=2)
# Fit the random search model
GBDT_random.fit(X_train, y_train)

Fitting 5 folds for each of 10 candidates, totalling 50 fits
[CV] END learning_rate=0.2, max_depth=12, max_features=auto, n_estimators=20, random_state=0; total time=52.2min
[CV] END learning_rate=0.2, max_depth=12, max_features=auto, n_estimators=20, random_state=0; total time=52.3min
[CV] END learning_rate=0.2, max_depth=12, max_features=auto, n_estimators=20, random_state=0; total time=52.3min
[CV] END learning_rate=0.2, max_depth=12, max_features=auto, n_estimators=20, random_state=0; total time=52.2min
[CV] END learning_rate=0.2, max_depth=12, max_features=auto, n_estimators=20, random_state=0; total time=51.9min
[CV] END learning_rate=0.2, max_depth=5, max_features=auto, n_estimators=20, random_state=0; total time=22.1min
[CV] END learning_rate=0.2, max_depth=5, max_features=auto, n_estimators=20, random_state=0; total time=22.0min
[CV] END learning_rate=0.2, max_depth=5, max_features=auto, n_estimators=20, random_state=0; total time=22.0min
[CV] END learning_rate=0.2, max_depth=

RandomizedSearchCV(cv=5, estimator=GradientBoostingClassifier(),
                   param_distributions={'learning_rate': [0.05, 0.1, 0.15, 0.2],
                                        'max_depth': [5, 7, 9, 12],
                                        'max_features': ['auto', 'sqrt'],
                                        'n_estimators': [10, 20, 30, 40],
                                        'random_state': [0]},
                   verbose=2)

In [37]:
GBDT_random.best_estimator_ , GBDT_random.best_score_

(GradientBoostingClassifier(learning_rate=0.2, max_depth=9, max_features='auto',
                            n_estimators=40, random_state=0),
 0.9723225030084237)

In [38]:
GB = GBDT_random.best_estimator_
GB.fit(X_train,y_train)
y_pred_val = GB.predict(X_val)
y_pred_test = GB.predict(X_test)

print('val accuracy:',accuracy_score(y_val, y_pred_val))
print('test accuracy:',accuracy_score(y_test, y_pred_test))

val accuracy: 0.9770430262777938
test accuracy: 0.978775628068149


In [41]:
if not os.path.exists('./model_params/'):
    os.makedirs('./model_params')

with open('./model_params/GB_params_tuning.pkl', 'wb') as f:
        pickle.dump(GBDT_random.best_estimator_, f)
with open('./model_params/RF_params_tuning.pkl', 'wb') as f:
        pickle.dump(rf_random.best_estimator_, f)

# 1-NN

In [7]:
OneNN_model = KNeighborsClassifier(n_neighbors=1).fit(X_train, y_train)

In [12]:
y_pred_val = OneNN_model.predict(X_val)
y_pred_test = OneNN_model.predict(X_test)
print('val accuracy:',accuracy_score(y_val, y_pred_val))
print('test accuracy:',accuracy_score(y_test, y_pred_test))

val accuracy: 0.998556165174704
test accuracy: 0.9988449321397632


In [10]:
if not os.path.exists('./model_params/'):
    os.makedirs('./model_params')
with open('./model_params/OneNN_params_tuning.pkl', 'wb') as f:
        pickle.dump(OneNN_model, f)

# CNN

In [4]:
batch_size = 128
num_classes = 24
epochs = 50

In [5]:
from tensorflow.keras.optimizers import SGD
pixels  = int(sqrt(X_train.shape[1]))
classes = len(np.unique(y_test))

model = Sequential()
model.add(Conv2D(64, kernel_size=(3,3), activation = 'relu', input_shape=(28, 28 ,1) ))
model.add(MaxPooling2D(pool_size = (2, 2)))

model.add(Conv2D(64, kernel_size = (3, 3), activation = 'relu'))
model.add(MaxPooling2D(pool_size = (2, 2)))

model.add(Conv2D(64, kernel_size = (3, 3), activation = 'relu'))
model.add(MaxPooling2D(pool_size = (2, 2)))

model.add(Flatten())
model.add(Dense(128, activation = 'relu'))
model.add(Dropout(0.20))

model.add(Dense(num_classes, activation = 'softmax'))

model.compile(loss = keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(), metrics=['accuracy'])
# model.save(f'./model_params/CNN_params_tuning')

In [6]:
from numpy.random import seed
seed(0)
import tensorflow as tf
tf.random.set_seed(0)

In [8]:
#load model
num_labels = classes
pixels = int(sqrt(X_train.shape[1]))
# Reshaping to format which CNN expects (batch, height, width, channels)
trainX_cnn = X_train.reshape(X_train.shape[0], pixels, pixels, 1).astype('float32')
valX_cnn = X_val.reshape(X_val.shape[0], pixels, pixels, 1).astype('float32')
testX_cnn= X_test.reshape(X_test.shape[0], pixels, pixels, 1).astype('float32')

# Normalize images from 0-255 to 0-1
trainX_cnn /= 255
valX_cnn /= 255

train_y_cnn = utils.to_categorical(y_train, num_labels)
val_y_cnn = utils.to_categorical(y_val, num_labels)
test_y_cnn = utils.to_categorical(y_test, num_labels)

trainX_cnn, evalX_cnn, train_y_cnn , eval_y_cnn = train_test_split(trainX_cnn, train_y_cnn, test_size=0.2)

# Learn model
model.fit(trainX_cnn, train_y_cnn, validation_data=(evalX_cnn, eval_y_cnn), epochs=50, batch_size=128)
# model.fit(trainX_cnn, train_y_cnn, validation_data=(evalX_cnn, eval_y_cnn), epochs=5, batch_size=128)


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x2181735bf40>

In [9]:
# model = keras.models.load_model(f'./0/model/CNN_model')

In [10]:
y_pred_val=np.argmax(model.predict(valX_cnn),axis=1)
y_pred_test=np.argmax(model.predict(testX_cnn),axis=1)

In [11]:
print('val accuracy:',accuracy_score(y_val, y_pred_val))
print('test accuracy:',accuracy_score(y_test, y_pred_test))

val accuracy: 0.8904129367600346
test accuracy: 0.8228414669361825


In [12]:
model 

<keras.engine.sequential.Sequential at 0x218172b3eb0>

In [13]:
from keras.wrappers.scikit_learn import KerasClassifier

In [14]:
model_sklearn = KerasClassifier(build_fn=model,
                                 batch_size=128,
                                 epochs=5,
                                 )

In [15]:
model_c = CalibratedClassifierCV(base_estimator=model_sklearn,method='sigmoid', cv='prefit')


In [16]:
model_c.fit(X_val, y_val)

NotFittedError: This KerasClassifier instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator.