In [None]:
from keras.applications.vgg16 import VGG16, preprocess_input
from keras.applications.resnet50 import ResNet50, preprocess_input
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import OneHotEncoder
import numpy as np
model_to_beat = ResNet50(weights='imagenet')
# model_to_hat = ResNet50(include_top=False)
x, y = np.load('val_x_resnet50.npy'), np.load('val_y.npy') 
pred = model_to_beat.predict(x)
tmp = np.zeros_like(pred)
tmp[np.arange(len(pred)), pred.argmax(1)] = 1

tmp_y = y.reshape((y.shape[0], 1))
tmp_y = OneHotEncoder().fit_transform(tmp_y)

print(accuracy_score(tmp_y, tmp))

In [None]:
def top_n_accuracy(x, y, top=500):
    total_correct = 0
    for i in range(len(x)):
        preds = x[i].argsort()[-top:][::-1]
        if y[i] in preds:
            total_correct += 1
    return total_correct/len(x)
top_n_accuracy(pred, y)



In [None]:
import numpy as np
from keras.preprocessing import image
import os
import tarfile
import pickle
from keras.applications.resnet50 import ResNet50, preprocess_input

path_to_imagenet = '/media/louis/Seagate Backup Plus Drive/Datasets/ImageNet/'
def make_validation_set():
    val_labels_path = path_to_imagenet + 'ILSVRC2012_validation_ground_truth.txt'
    val_labels = open(val_labels_path ,'r').readlines()
    val_labels = list(map(lambda x: int(x.strip('\n')), val_labels[:-1]))
    val_labels = np.array(val_labels)

    val_data_path = path_to_imagenet + 'Validation/'
    all_val_data_path = os.listdir(val_data_path)
    all_val_data = np.zeros(shape=(len(all_val_data_path), 224, 224, 3), dtype=np.float32)

    for i in range(len(all_val_data)):
        all_val_data[i] = image.img_to_array(image.load_img(val_data_path + all_val_data_path[i],
                                                                        target_size=(224, 224))).astype(np.float32)
        all_val_data[i] = preprocess_input(all_val_data[i]).astype(np.float32)
        if i % 5000 == 0:
            print(i)
            
    np.save('val_x', all_val_data)
    cut_model = ResNet50(include_top=False)
    all_val_data = cut_model.predict(all_val_data)
    all_val_data = np.squeeze(all_val_data)
    np.save('val_ResNet50_x', all_val_data)
    np.save('val_y', val_labels)


# def make_training_set(cut_model):

#     train_data_path = path_to_imagenet + 'Training/'
#     all_train_data = os.listdir(train_data_path)
#     train_data = []
#     train_val = []
#     for i in range(len(all_train_data)):
#         class_images = os.listdir(train_data_path + all_train_data[i])
#         tmp_train_data = []
#         for images in class_images:
#             tmp_train_data.append(image.img_to_array(
#                 image.load_img(train_data_path + all_train_data[i] + '/' + images, 
#                                      target_size=(224, 224))).astype(np.float32))
#             train_val.append(i)
#         tmp_train_data = np.asarray(tmp_train_data) 
#         tmp_train_data = preprocess_input(tmp_train_data).astype(np.float32)
#         tmp_train_data = cut_model.predict(tmp_train_data)
#         tmp_train_data = np.squeeze(tmp_train_data)
#         train_data.append(tmp_train_data)
#         if i % 10 == 0:
#             print(i)
#     train_data = np.concatenate(train_data)
#     print(train_data.shape)
#     np.save('train_ResNet50_x', train_data)
#     train_val = np.asarray(train_val)
#     np.save('train_ResNet50_y', train_val)
        
        
make_validation_set()

In [None]:
import pandas as pd
import numpy as np
from lightgbm import LGBMClassifier
from sklearn.model_selection import cross_val_score
from skopt.space import Real, Integer
from skopt.utils import use_named_args
from skopt import gp_minimize

def unison_shuffled_copies(a, b):
    assert len(a) == len(b)
    p = np.random.permutation(len(a))
    return a[p], b[p]

train_x, train_y = np.load('train_ResNet50_x.npy'), np.load('train_ResNet50_y.npy')
train_x, train_y = unison_shuffled_copies(train_x, train_y)
val_x, val_y = np.load('val_ResNet50_x.npy'), np.load('val_y.npy')
train_x, train_y = train_x[:len(train_x)//5], train_y[:len(train_y)//5]
print(train_x.shape, val_x.shape, train_y.shape, val_y.shape)

(256233, 2048) (50000, 2048) (256233,) (50000,)


In [None]:
space = [Integer(2, 200, name='n_estimators'),
         Real(10e-5, 10e-1, "log-uniform", name='learning_rate'),
         Integer(2, 20, name='num_leaves'),
         Real(10e-10, 10e-1, name='reg_alpha'),
         Real(10e-10, 10e-1, name='reg_lambda')]

clf = LGBMClassifier(verbose=-1)

@use_named_args(space)
def objective(**params):
    print(params)
    clf.set_params(**params)
    return -np.mean(cross_val_score(clf, train_x, train_y, cv=2,
                                    scoring='neg_log_loss', verbose=True))

res_gp = gp_minimize(objective, space, n_calls=10, random_state=42, verbose=True)
print(res_gp.x)

In [None]:
# best_clf = LGBMClassifer(n_estimators=res_gp.x[0], learning_rate=res_gp.x[1], num_leaves=res_gp.x[2],
#                                reg_alpha=res_gp.x[3], reg_lambda=res_gp.x[4])
best_clf = LGBMClassifier()
best_clf.fit(train_x, train_y)

In [None]:
pred = clf.predict(train_x)
tmp_y = val_y.reshape((val_y.shape[0], 1))
tmp_y = OneHotEncoder().fit_transform(tmp_y)

print(accuracy_score(tmp_y, pred))