In [1]:
# Import required packages
import numpy as np
import cv2
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report
from sklearn.linear_model import LogisticRegression
import tensorflow as tf
import tensorflow_hub as hub

In [2]:
#!pip install opencv-python
#!pip install tensorflow
# !pip install tensorflow_hub

## 1. Load the datasets

For the project, we provide a training set with 50000 images in the directory `../data/images/` with:
- noisy labels for all images provided in `../data/noisy_label.csv`;
- clean labels for the first 10000 images provided in `../data/clean_labels.csv`. 

In [3]:
# [DO NOT MODIFY THIS CELL]

# load the images
n_img = 50000
n_noisy = 40000
n_clean_noisy = n_img - n_noisy
imgs = np.empty((n_img,32,32,3))
for i in range(n_img):
    img_fn = f'../data/images/{i+1:05d}.png'
    imgs[i,:,:,:]=cv2.cvtColor(cv2.imread(img_fn),cv2.COLOR_BGR2RGB)

# load the labels
clean_labels = np.genfromtxt('../data/clean_labels.csv', delimiter=',', dtype="int8")
noisy_labels = np.genfromtxt('../data/noisy_labels.csv', delimiter=',', dtype="int8")

For illustration, we present a small subset (of size 8) of the images with their clean and noisy labels in `clean_noisy_trainset`. You are encouraged to explore more characteristics of the label noises on the whole dataset. 

In [None]:
# [DO NOT MODIFY THIS CELL]

fig = plt.figure()

ax1 = fig.add_subplot(2,4,1)
ax1.imshow(imgs[0]/255)
ax2 = fig.add_subplot(2,4,2)
ax2.imshow(imgs[1]/255)
ax3 = fig.add_subplot(2,4,3)
ax3.imshow(imgs[2]/255)
ax4 = fig.add_subplot(2,4,4)
ax4.imshow(imgs[3]/255)
ax1 = fig.add_subplot(2,4,5)
ax1.imshow(imgs[4]/255)
ax2 = fig.add_subplot(2,4,6)
ax2.imshow(imgs[5]/255)
ax3 = fig.add_subplot(2,4,7)
ax3.imshow(imgs[6]/255)
ax4 = fig.add_subplot(2,4,8)
ax4.imshow(imgs[7]/255)

# The class-label correspondence
classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

# print clean labels
print('Clean labels:')
print(' '.join('%5s' % classes[clean_labels[j]] for j in range(8)))
# print noisy labels
print('Noisy labels:')
print(' '.join('%5s' % classes[noisy_labels[j]] for j in range(8)))

## 2. The predictive model

We consider a baseline model directly on the noisy dataset without any label corrections. RGB histogram features are extracted to fit a logistic regression model.

### 2.1. Baseline Model

In [None]:
# [DO NOT MODIFY THIS CELL]
# RGB histogram dataset construction
no_bins = 6
bins = np.linspace(0,255,no_bins) # the range of the rgb histogram
target_vec = np.empty(n_img)
feature_mtx = np.empty((n_img,3*(len(bins)-1)))
i = 0
for i in range(n_img):
    # The target vector consists of noisy labels
    target_vec[i] = noisy_labels[i]
    
    # Use the numbers of pixels in each bin for all three channels as the features
    feature1 = np.histogram(imgs[i][:,:,0],bins=bins)[0] 
    feature2 = np.histogram(imgs[i][:,:,1],bins=bins)[0]
    feature3 = np.histogram(imgs[i][:,:,2],bins=bins)[0]
    
    # Concatenate three features
    feature_mtx[i,] = np.concatenate((feature1, feature2, feature3), axis=None)
    i += 1

In [None]:
# [DO NOT MODIFY THIS CELL]
# Train a logistic regression model 
clf = LogisticRegression(random_state=0).fit(feature_mtx, target_vec)

For the convenience of evaluation, we write the following function `predictive_model` that does the label prediction. **For your predictive model, feel free to modify the function, but make sure the function takes an RGB image of numpy.array format with dimension $32\times32\times3$  as input, and returns one single label as output.**

In [None]:
# [DO NOT MODIFY THIS CELL]
def baseline_model(image):
    '''
    This is the baseline predictive model that takes in the image and returns a label prediction
    '''
    feature1 = np.histogram(image[:,:,0],bins=bins)[0]
    feature2 = np.histogram(image[:,:,1],bins=bins)[0]
    feature3 = np.histogram(image[:,:,2],bins=bins)[0]
    feature = np.concatenate((feature1, feature2, feature3), axis=None).reshape(1,-1)
    return clf.predict(feature)

### 2.2. Model I

We used ResNet50 model to predict the labels of the image.

In [None]:
## split train and test noisy data
print(noisy_labels.shape)
print(imgs.shape)
n_samples=imgs.shape[0]
perm = np.random.permutation(n_samples)
test_size = int(n_samples * 0.2)
val_idx = perm[:test_size]
train_idx = perm[test_size:]
Y_train, X_train = noisy_labels[train_idx], imgs[train_idx]
Y_val, X_val= noisy_labels[val_idx], imgs[val_idx]
print(X_train.shape,Y_train.shape,X_val.shape,Y_val.shape)#train/test split

In [None]:
# #train/test split for clean data (only run either noisy data or clean data, not both, the variable name of will be covered)
# print(clean_labels.shape)
# print(imgs[:10000].shape)
# n_samples=imgs[:10000].shape[0]
# perm = np.random.permutation(n_samples)
# test_size = int(n_samples * 0.2)
# val_idx = perm[:test_size]
# train_idx = perm[test_size:]
# Y_train, X_train = clean_labels[train_idx], imgs[:10000][train_idx]
# Y_val, X_val= clean_labels[val_idx], imgs[:10000][val_idx]
# print(X_train.shape,Y_train.shape,X_val.shape,Y_val.shape)#train/test split

In [None]:
# train a model
base_model = tf.keras.applications.resnet.ResNet50(weights='imagenet', 
                                                   input_shape=(32, 32, 3),
                                                   include_top=False)#include_top=False means not include the output layer
base_model.trainable = False #Do not train the transfered model again
inputs = tf.keras.Input(shape=(32, 32, 3))
x = tf.keras.applications.resnet.preprocess_input(inputs)#make sure to use the corresponding preprocessing
x = base_model(x, training=False)
x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = tf.keras.layers.Dropout(0.2)(x)
x = tf.keras.layers.Dense(1024,activation="relu")(x)
x = tf.keras.layers.Dense(512,activation="relu")(x)
x = tf.keras.layers.Dense(128,activation="relu")(x)
outputs = tf.keras.layers.Dense(10,activation="softmax")(x)
model = tf.keras.Model(inputs, outputs)
model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=0.001), 
                  loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                  metrics=tf.keras.metrics.SparseCategoricalAccuracy())
early_stopping = tf.keras.callbacks.EarlyStopping(patience=3) #early stopping: let the training process stops when it reaches a maximum

In [None]:
def model_I(image):
    '''
    This function should takes in the image of dimension 32*32*3 as input and returns a label prediction
    '''
    return np.argmax(model.predict(image),axis=-1)

In [None]:
import time

start = time.time()
history = model.fit(X_train,Y_train, epochs=100, validation_data=(X_val,Y_val),callbacks=[early_stopping])
end = time.time()

print('Model 1 training takes '+ str(end - start)+ ' seconds')

In [None]:
model.summary()

### 2.3. Model II

We used VGG16 in Model II. We trained the VGG16 model based on the predicted labels of Model I. We could see the accuracy went up to 57% in comparison to Model I. 
- Model II takes about 111 seconds to run 
- Early stopping is used to avoid overfitting.

In [None]:
import tensorflow.keras as keras
#from tensorflow.khttp://localhost:8888/notebooks/Downloads/main.ipynb #2.3.-Model-IIeras.applications.vgg16 import VGG16
from keras.applications.vgg16 import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras import layers, models

In [None]:
## break down train and test
clean_labels = list(clean_labels)
noisy_labels = list(noisy_labels[10000:])

clean_imgs_train_x, test_x, clean_imgs_train_y, test_y  = train_test_split(imgs[0:10000], clean_labels, test_size = 0.2, random_state=5)
noisy_imgs_train_x, noisy_test_x, noise_imgs_train_y, noisy_test_y  = train_test_split(imgs[10000:], noisy_labels, test_size = 0.2, random_state=5)

imgs_train_x = np.concatenate((clean_imgs_train_x, noisy_imgs_train_x))
imgs_train_y = np.concatenate((clean_imgs_train_y, noise_imgs_train_y))
imgs_train_y = tf.one_hot(imgs_train_y, depth = 10)
imgs_test_y = tf.one_hot(test_y, depth = 10)

In [None]:
## load the VGG16 model and define

def vgg16():
    model2 = VGG16(weights="imagenet", include_top=False, input_shape=(32, 32, 3))
    model2.trainable = False ## Not trainable weights

    # adding layers
    flatten_layer = layers.Flatten()
    dense_layer_1 = layers.Dense(50, activation='relu')
    dense_layer_2 = layers.Dense(20, activation='relu')
    prediction_layer = layers.Dense(10, activation='softmax')

    model_vgg = models.Sequential([
        layers.Rescaling(1./255, input_shape=(32,32,3)),
        model2,
        flatten_layer,
        dense_layer_1,
        dense_layer_2,
        prediction_layer
    ])
    
    # compile
    model_vgg.compile(optimizer='adam', loss=keras.losses.categorical_crossentropy, metrics=['accuracy'],
    )
    
    return model_vgg

## define model II

def model_II(image):
    '''
    This function should takes in the image of dimension 32*32*3 as input and returns a label prediction
    '''
    return np.argmax(model_vgg().predict(image), axis = 1)

In [None]:
## train VGG16 model with predicted labels from Model I
predicted_labels = model_I(imgs)
pl = list(predicted_labels)
pl_train_x, pl_test_x, pl_train_y, pl_test_y = train_test_split(imgs, pl, test_size=0.2, random_state = 5)
pl_train_y = tf.one_hot(pl_train_y, depth = 10)

## run model
model_vgg = vgg16
es = EarlyStopping(monitor='val_accuracy', mode='max', patience=3,  restore_best_weights=True) #early stopping to avoid overfitting
model_vgg().fit(pl_train_x, pl_train_y, epochs=50, validation_split=0.2, batch_size=512, callbacks=[es])

## 3. Evaluation

For assessment, we will evaluate your final model on a hidden test dataset with clean labels by the `evaluation` function defined as follows. Although you will not have the access to the test set, the function would be useful for the model developments. For example, you can split the small training set, using one portion for weakly supervised learning and the other for validation purpose. 

In [None]:
# [DO NOT MODIFY THIS CELL]
def evaluation(model, test_labels, test_imgs):
    y_true = test_labels
    y_pred = []
    for image in test_imgs:
        y_pred.append(model(image))
    print(classification_report(y_true, y_pred))

In [None]:
# [DO NOT MODIFY THIS CELL]
# This is the code for evaluating the prediction performance on a testset
# You will get an error if running this cell, as you do not have the testset
# Nonetheless, you can create your own validation set to run the evlauation
n_test = 10000
test_labels = np.genfromtxt('../data/test_labels.csv', delimiter=',', dtype="int8")
test_imgs = np.empty((n_test,32,32,3))
for i in range(n_test):
    img_fn = f'../data/test_images/test{i+1:05d}.png'
    test_imgs[i,:,:,:]=cv2.cvtColor(cv2.imread(img_fn),cv2.COLOR_BGR2RGB)
    
evaluation(baseline_model, test_labels, test_imgs)

The overall accuracy is $0.24$, which is better than random guess (which should have a accuracy around $0.10$). For the project, you should try to improve the performance by the following strategies:

- Consider a better choice of model architectures, hyperparameters, or training scheme for the predictive model;
- Use both `clean_noisy_trainset` and `noisy_trainset` for model training via **weakly supervised learning** methods. One possible solution is to train a "label-correction" model using the former, correct the labels in the latter, and train the final predictive model using the corrected dataset.
- Apply techniques such as $k$-fold cross validation to avoid overfitting;
- Any other reasonable strategies.