# Loading packages

In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
import os
import random
import gc

from os import path
from matplotlib import pyplot as plt
from tensorflow.keras import layers, losses, optimizers, metrics, callbacks, Model, Input, regularizers
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input
from tensorflow.keras.utils import image_dataset_from_directory
from sklearn.linear_model import LogisticRegressionCV

In [2]:
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    tf.config.set_logical_device_configuration(
        gpus[0],
        [tf.config.LogicalDeviceConfiguration(memory_limit=9048)]
    )

logical_gpus = tf.config.list_logical_devices('GPU')
print(len(gpus), "Physical GPU,", len(logical_gpus), "Logical GPUs")

1 Physical GPU, 1 Logical GPUs


In [3]:
SEED = 123
N_CLASS = 10
IMG_SIZE = 224
BATCH_SIZE = 512
AUTOTUNE = tf.data.AUTOTUNE

# Prepare train dataset

In [4]:
labels_df = pd.read_csv(path.join(os.getcwd(), '..', 'cifar-10', 'trainLabels.csv'))
labels_df.label = pd.Categorical(labels_df.label)
labels_df['codes'] = labels_df.label.cat.codes
labels_df['name'] = labels_df['id'].astype(str) + ".png"
labels_df.sort_values(by="name")
labels_df

Unnamed: 0,id,label,codes,name
0,1,frog,6,1.png
1,2,truck,9,2.png
2,3,truck,9,3.png
3,4,deer,4,4.png
4,5,automobile,1,5.png
...,...,...,...,...
49995,49996,bird,2,49996.png
49996,49997,frog,6,49997.png
49997,49998,truck,9,49998.png
49998,49999,automobile,1,49999.png


In [5]:
filenames = list(os.walk(path.join(os.getcwd(), '..', 'cifar-10', 'train', 'train_images'), topdown=False))[0][2]
filenames = pd.DataFrame(filenames, columns=["name"])
filenames = filenames.merge(labels_df, on="name")
filenames

Unnamed: 0,name,id,label,codes
0,1.png,1,frog,6
1,10.png,10,cat,3
2,100.png,100,automobile,1
3,1000.png,1000,dog,5
4,10000.png,10000,dog,5
...,...,...,...,...
49995,9995.png,9995,frog,6
49996,9996.png,9996,cat,3
49997,9997.png,9997,truck,9
49998,9998.png,9998,automobile,1


In [6]:
dataset_train = image_dataset_from_directory(
    directory=path.join(os.getcwd(), '..', 'cifar-10', 'train'),
    labels=list(filenames['codes']),
    label_mode='int',
    shuffle=False,
    image_size=(IMG_SIZE, IMG_SIZE),
    batch_size=512,
)
dataset_train = dataset_train.map(lambda x, y: (preprocess_input(x), tf.one_hot(y, N_CLASS)))

Found 50000 files belonging to 1 classes.


In [7]:
def get_model():
    resnet = ResNet50(input_shape=(IMG_SIZE, IMG_SIZE, 3), weights="imagenet", include_top=False)

    for i, layer in enumerate(resnet.layers):
        if not layer.name.startswith('conv5'):
            layer.trainable = False

    x = layers.GlobalAveragePooling2D()(resnet.output)
    x = layers.Dropout(0.3)(x)
    x = layers.Dense(2048, activation="relu")(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.3)(x)
    output = layers.Dense(N_CLASS, activation="softmax")(x)
    model = Model(resnet.input, output)
    return model

model = get_model()
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 224, 224, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv1_pad (ZeroPadding2D)      (None, 230, 230, 3)  0           ['input_1[0][0]']                
                                                                                                  
 conv1_conv (Conv2D)            (None, 112, 112, 64  9472        ['conv1_pad[0][0]']              
                                )                                                                 
                                                                                              

In [8]:
results = []
for i in range(10):
    model = get_model()
    model.load_weights(f"weights/weights_{i}.h5")
    eval_result = model.predict(dataset_train)
    results.append(eval_result)
    gc.collect()



In [9]:
len(results)

10

In [10]:
train_dataset = np.concatenate(results, axis=1)
train_dataset.shape

(50000, 100)

# Learn ensemble

In [11]:
lr = LogisticRegressionCV(Cs=50, n_jobs=-1)
y = filenames.codes
lr.fit(train_dataset, y)

In [13]:
import pickle
with open("weights/lr.pickle", "wb") as file:
    pickle.dump(lr, file)

## Loading test dataset

In [17]:
results = []
for i in range(10):
    result = np.load(f"results/result_{i}.npy")
    results.append(result)
    
test_dataset = np.concatenate(results, axis = 1)
test_dataset.shape

(300000, 100)

In [18]:
final_result = lr.predict(test_dataset)
final_result.shape

(300000,)

In [19]:
np.save("results/final_result", final_result)