# Deep Learning Project

## Milestone report

## Study of the impact on the ratio of labeled to unlabeled data on top-1 accuracy on the CIFAR-10 dataset

Pierre Andurand (pa2570)
Tzu Yi Chuang (tc3075)
Kuan Yu Ko (kk3376)


In [9]:
#This part is not needed for now

# Import standard libraries
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf

# Import TF layers
from tensorflow.keras import Model
from tensorflow.keras import layers
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Dense
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import GlobalAveragePooling2D
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import MaxPooling2D

# Import TF utilities
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint

# Import TF pretrained models
from tensorflow.keras.applications.inception_v3 import InceptionV3
from tensorflow.keras.applications.inception_resnet_v2 import InceptionResNetV2
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.applications.vgg19 import VGG19
from tensorflow.keras.applications.xception import Xception
from tensorflow.keras.applications.mobilenet import MobileNet
from tensorflow.keras.applications.densenet import DenseNet121
from tensorflow.keras.applications.nasnet import NASNetMobile

Below we train a simple model in supervised learning without data augmentation. We check its performance on the cifar10 dataset. It will be our un-noised teacher model. And we will compare its performance with 100 epochs to the semi supervised self-learning model in the following block

In [17]:
from __future__ import print_function
import keras
from keras.datasets import cifar10
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D
import os

batch_size = 200
num_classes = 10
epochs = 100
data_augmentation = False
#num_predictions = 20
save_dir = os.path.join(os.getcwd(), 'saved_models')
teacher_name = 'keras_cifar10_trained_teacher.h5'

# The data, split between train and test sets:
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# Convert class vectors to binary class matrices.
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

teacher = Sequential()
teacher.add(Conv2D(32, (3, 3), padding='same',
                 input_shape=x_train.shape[1:]))
teacher.add(Activation('relu'))
teacher.add(Conv2D(32, (3, 3)))
teacher.add(Activation('relu'))
teacher.add(MaxPooling2D(pool_size=(2, 2)))
teacher.add(Dropout(0.25))

teacher.add(Conv2D(64, (3, 3), padding='same'))
teacher.add(Activation('relu'))
teacher.add(Conv2D(64, (3, 3)))
teacher.add(Activation('relu'))
teacher.add(MaxPooling2D(pool_size=(2, 2)))
teacher.add(Dropout(0.25))

teacher.add(Flatten())
teacher.add(Dense(512))
teacher.add(Activation('relu'))
#model.add(Dropout(0.5))
teacher.add(Dense(num_classes))
teacher.add(Activation('softmax'))

# initiate RMSprop optimizer
opt = keras.optimizers.RMSprop(learning_rate=0.0001, decay=1e-6)

# Compiling the model using RMSprop
teacher.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255

#Training the model

if not data_augmentation:
    print('Not using data augmentation.')
    teacher.fit(x_train, y_train,
              batch_size=batch_size,
              epochs=epochs,
              validation_split=0.2,
              shuffle=True)
else:
    print('Using real-time data augmentation.')
    # This will do preprocessing and realtime data augmentation:
    datagen = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        zca_epsilon=1e-06,  # epsilon for ZCA whitening
        rotation_range=0,  # randomly rotate images in the range (degrees, 0 to 180)
        # randomly shift images horizontally (fraction of total width)
        width_shift_range=0.1,
        # randomly shift images vertically (fraction of total height)
        height_shift_range=0.1,
        shear_range=0.,  # set range for random shear
        zoom_range=0.,  # set range for random zoom
        channel_shift_range=0.,  # set range for random channel shifts
        # set mode for filling points outside the input boundaries
        fill_mode='nearest',
        cval=0.,  # value used for fill_mode = "constant"
        horizontal_flip=True,  # randomly flip images
        vertical_flip=False,  # randomly flip images
        # set rescaling factor (applied before any other transformation)
        rescale=None,
        # set function that will be applied on each input
        preprocessing_function=None,
        # image data format, either "channels_first" or "channels_last"
        data_format=None,
        # fraction of images reserved for validation (strictly between 0 and 1)
        validation_split=0.0)

    # Compute quantities required for feature-wise normalization
    # (std, mean, and principal components if ZCA whitening is applied).
    datagen.fit(x_train)

    # Fit the model on the batches generated by datagen.flow().
    teacher.fit_generator(datagen.flow(x_train, y_train,
                                     batch_size=batch_size),
                        epochs=epochs,
                        validation_split=0.2,
                        workers=4)
    
# Save model and weights
if not os.path.isdir(save_dir):
    os.makedirs(save_dir)
teacher_path = os.path.join(save_dir, teacher_name)
teacher.save(teacher_path)
print('Saved trained model at %s ' % teacher_path)

# Score trained model.
scores = teacher.evaluate(x_test, y_test, verbose=1)
print('Supervised learning model with '+str(epochs)+'epochs \n')
print('Test loss:', scores[0])
print('Test accuracy:', scores[1])


x_train shape: (50000, 32, 32, 3)
50000 train samples
10000 test samples
Not using data augmentation.
Train on 40000 samples, validate on 10000 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100


Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
Saved trained model at C:\Users\pandurand\Documents\Columbia\DL\project\saved_models\keras_cifar10_trained_teacher.h5 
Supervised learning model with 100epochs 

Test loss: 1.0106337991714478
Test accuracy: 0.7472000122070312


Below we study the semi supervised learning method as described in the article, and study the impact of the ratio of true labels to pseudo labels on accuracy and loss. 
We do a loop over different ratios of label to unlabelled data (rate). And each loop does the following:
1) Train un-noised model (teacher) on labeled data only 
2) Ten cycles of: un-noised model (teacher)->predict hard pseudolabel->training 10 epochs for noised model (student=teacher+dropout noise) on labeled+pseudo labeled->new weights. 

We check rates from 0.1 to 50. The student model will be the teacher model noised by a Dropout(0.5) before the last layer

In [16]:
from __future__ import print_function
import keras
from keras.datasets import cifar10
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D
import os

#batch_size = 32
num_classes = 10
#epochs = 50
#data_augmentation = False
#num_predictions = 20
save_dir = os.path.join(os.getcwd(), 'saved_models')
teacher_name = 'keras_cifar10_trained_teacher.h5'

teacher_path = os.path.join(save_dir, teacher_name)

# Load the cifar10 data, split between train and test sets:
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# Convert class vectors to binary class matrices.
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)


#making un-noised teacher model
teacher = Sequential()
teacher.add(Conv2D(32, (3, 3), padding='same',
                 input_shape=x_train.shape[1:]))
teacher.add(Activation('relu'))
teacher.add(Conv2D(32, (3, 3)))
teacher.add(Activation('relu'))
teacher.add(MaxPooling2D(pool_size=(2, 2)))
teacher.add(Dropout(0.25))

teacher.add(Conv2D(64, (3, 3), padding='same'))
teacher.add(Activation('relu'))
teacher.add(Conv2D(64, (3, 3)))
teacher.add(Activation('relu'))
teacher.add(MaxPooling2D(pool_size=(2, 2)))
teacher.add(Dropout(0.25))

teacher.add(Flatten())
teacher.add(Dense(512))
teacher.add(Activation('relu'))
#model.add(Dropout(0.5)) #this will be uncommented for the noised student model
teacher.add(Dense(num_classes))
teacher.add(Activation('softmax'))

# initiate RMSprop optimizer
opt = keras.optimizers.RMSprop(learning_rate=0.0001, decay=1e-6)

# Compile the teacher model using RMSprop
teacher.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255



#noised student model

student = Sequential()
student.add(Conv2D(32, (3, 3), padding='same',
                 input_shape=x_train.shape[1:]))
student.add(Activation('relu'))
student.add(Conv2D(32, (3, 3)))
student.add(Activation('relu'))
student.add(MaxPooling2D(pool_size=(2, 2)))
student.add(Dropout(0.25))

student.add(Conv2D(64, (3, 3), padding='same'))
student.add(Activation('relu'))
student.add(Conv2D(64, (3, 3)))
student.add(Activation('relu'))
student.add(MaxPooling2D(pool_size=(2, 2)))
student.add(Dropout(0.25))

student.add(Flatten())
student.add(Dense(512))
student.add(Activation('relu'))
student.add(Dropout(0.5))
student.add(Dense(num_classes))
student.add(Activation('softmax'))

# initiate RMSprop optimizer
opt = keras.optimizers.RMSprop(learning_rate=0.0001, decay=1e-6)

# Compiling the model using RMSprop
student.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])





# rate= n_true/n_pseudo
rate=np.array([0.1,0.2,0.3,0.5,0.75,1,2,3,4,5,10,20,30,50])
n_total=x_train.shape[0]

# total number of train images (n_total) = number of true label images (n_true) + number of pseudo label images (n_pseudo)
#                                        = n_pseudo(rate+1)
# n_pseudo = n_total/(rate+1); n_true=n_total-n_pseudo

#loop over rate values in order to find the optimal rate value for the self-learning semi supervised learning, 
#ie one that will maximize accuracy
for r in rate:
    print("rate="+str(r)+":\n")
    n_pseudo=n_total/(1+r)
    n_true=n_total-n_pseudo
    mask_true=np.random.choice(int(n_total),int(n_true),replace=False) #generating n_true integers between 0 and n_total-1
    mask_pseudo=[item for item in range(n_total) if item not in mask_true] #all the other numbers between 0 and n_total-1 which are not in mask_true
    mask_pseudo=np.array(mask_pseudo)
    x_true=x_train[mask_true[:]] #x for the labeled data
    y_true=y_train[mask_true[:]] #y for the labeled data
    x_pseudo=x_train[mask_pseudo[:]] #x for the unlabeled data (pseudo)
    #training teacher model on labeled data with validation split of 0.2
    training=teacher.fit(x_true,y_true,validation_split=0.2,
                            epochs=10,batch_size=200,verbose=0)
    #evaluating teacher model on test data    
    scores=teacher.evaluate(x_test,y_test,verbose=0)
    print("Original model with labelled data only predicting on test data: ",scores[1])

    x_true_pseudo=np.concatenate([x_true,x_pseudo]) #concatenating x for labeled and unlabeled data
    print('x_true_pseudo.shape: ',x_true_pseudo.shape)
    prediction=teacher.predict_classes(x_pseudo) #predicting labels on unlabeled data
    y_pseudo=keras.utils.to_categorical(prediction, num_classes)
    y_true_pseudo=np.concatenate([y_true,y_pseudo]) #concatenating y for labeled and pseudo labeled
    print('y_true_pseudo.shape: ', y_true_pseudo.shape)
    for i in range(10): 
        # 10 loops of 10 epochs of noised student training for labeled and pseudo labeled data (step 3 in article)
        # followed by generating predictions on unlabeled data with the teacher model (=un-noised student)
        # which uses the weights of the trained noised student (noise does not change the weights structure of models) (step 2 in article)
        print(i)
        training=student.fit(x_true_pseudo,y_true_pseudo,validation_split=0.2,
                             epochs=10,batch_size=200,verbose=0)
        # Save weights
        student.save_weights(teacher_path)
        # Load weights for teacher model (un-noised)
        teacher.load_weights(teacher_path)
        prediction=teacher.predict_classes(x_pseudo)
        scores=teacher.evaluate(x_test,y_test,verbose=0) #evaluating model on test data
        print('iteration: ',i)
        print('Test loss:', scores[0])
        print('Test accuracy:', scores[1])
        y_pseudo=keras.utils.to_categorical(prediction, num_classes)
        y_true_pseudo=np.concatenate([y_true,y_pseudo]) #new y_true_pseudo to be used in next loop
        

#from keras.utils import plot_model
#plot_model(teacher,to_file='teacher.png')
#plot_model(student,to_file='student.png')


x_train shape: (50000, 32, 32, 3)
50000 train samples
10000 test samples
rate=0.1:

Original model with labelled data only predicting on test data:  0.4259999990463257
x_true_pseudo.shape:  (50000, 32, 32, 3)
y_true_pseudo.shape:  (50000, 10)
0
iteration:  0
Test loss: 2.3814638202667235
Test accuracy: 0.40630000829696655
1
iteration:  1
Test loss: 2.481840321731567
Test accuracy: 0.3961000144481659
2
iteration:  2
Test loss: 2.5549630054473877
Test accuracy: 0.39399999380111694
3
iteration:  3
Test loss: 2.659085976409912
Test accuracy: 0.41119998693466187
4
iteration:  4
Test loss: 2.6760872135162352
Test accuracy: 0.4178999960422516
5
iteration:  5
Test loss: 2.606792931365967
Test accuracy: 0.4214000105857849
6
iteration:  6
Test loss: 2.630066015625
Test accuracy: 0.4205000102519989
7
iteration:  7
Test loss: 2.694792952346802
Test accuracy: 0.414000004529953
8
iteration:  8
Test loss: 2.6629435035705566
Test accuracy: 0.4052000045776367
9
iteration:  9
Test loss: 2.55188693542480

iteration:  3
Test loss: 0.638658087682724
Test accuracy: 0.7925000190734863
4
iteration:  4
Test loss: 0.639220199394226
Test accuracy: 0.7942000031471252
5
iteration:  5
Test loss: 0.6503369867324829
Test accuracy: 0.7918999791145325
6
iteration:  6
Test loss: 0.6423421550750732
Test accuracy: 0.7924000024795532
7
iteration:  7
Test loss: 0.6258201817035675
Test accuracy: 0.7961999773979187
8
iteration:  8
Test loss: 0.6589927075386047
Test accuracy: 0.796999990940094
9
iteration:  9
Test loss: 0.6539146895885467
Test accuracy: 0.7936999797821045
rate=5.0:

Original model with labelled data only predicting on test data:  0.7867000102996826
x_true_pseudo.shape:  (50000, 32, 32, 3)
y_true_pseudo.shape:  (50000, 10)
0
iteration:  0
Test loss: 0.631142573595047
Test accuracy: 0.7993000149726868
1
iteration:  1
Test loss: 0.6383090171337128
Test accuracy: 0.7996000051498413
2
iteration:  2
Test loss: 0.6523478969573975
Test accuracy: 0.7944999933242798
3
iteration:  3
Test loss: 0.6502401

We can see that the range of ratios of labeled/unlabeled data of 3 to 30 look optimal. This means that when one wants to find the best predictive algorithm on a given dataset that we cannot increase, keeping between 3% and 30% of the data as unlabeled and training a STSN would bring better accuracy. The accuracy on test data with the un-noised model used in supervised learning for 100 epochs is 0.7472, while the self-learning semi supervised learning algorithm with the same model and number of epochs give an accuracy in excess of 0.795. Noise was added on the model with a DropOut(0.5) on the layer before the last layer.
Below we do the same as we did for MNIST: We use a small labeled dataset of 1000 images. We then add a unlabeled dataset which size is determined by the ratio of labeled/unlabeled data. We first run and evaluate the supervised learning algorithm on the small labeled dataset, and then we run STNS and note the impact of the ratio on accuracy 

In [8]:
#supervised learning on small training dataset, testing on full testing dataset

from __future__ import print_function
import keras
from keras.datasets import cifar10
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D
import os
import numpy as np

#batch_size = 32
num_classes = 10
#epochs = 50
#data_augmentation = False
#num_predictions = 20
save_dir = os.path.join(os.getcwd(), 'saved_models')
teacher_name = 'keras_cifar10_trained_teacher.h5'

teacher_path = os.path.join(save_dir, teacher_name)

#making un-noised teacher model
teacher = Sequential()
teacher.add(Conv2D(32, (3, 3), padding='same',
                 input_shape=x_train.shape[1:]))
teacher.add(Activation('relu'))
teacher.add(Conv2D(32, (3, 3)))
teacher.add(Activation('relu'))
teacher.add(MaxPooling2D(pool_size=(2, 2)))
teacher.add(Dropout(0.25))

teacher.add(Conv2D(64, (3, 3), padding='same'))
teacher.add(Activation('relu'))
teacher.add(Conv2D(64, (3, 3)))
teacher.add(Activation('relu'))
teacher.add(MaxPooling2D(pool_size=(2, 2)))
teacher.add(Dropout(0.25))

teacher.add(Flatten())
teacher.add(Dense(512))
teacher.add(Activation('relu'))
#model.add(Dropout(0.5)) #this will be uncommented for the noised student model
teacher.add(Dense(num_classes))
teacher.add(Activation('softmax'))

# initiate RMSprop optimizer
opt = keras.optimizers.RMSprop(learning_rate=0.0001, decay=1e-6)

# Compile the teacher model using RMSprop
teacher.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])

#noised student model

student = Sequential()
student.add(Conv2D(32, (3, 3), padding='same',
                 input_shape=x_train.shape[1:]))
student.add(Activation('relu'))
student.add(Conv2D(32, (3, 3)))
student.add(Activation('relu'))
student.add(MaxPooling2D(pool_size=(2, 2)))
student.add(Dropout(0.25))

student.add(Conv2D(64, (3, 3), padding='same'))
student.add(Activation('relu'))
student.add(Conv2D(64, (3, 3)))
student.add(Activation('relu'))
student.add(MaxPooling2D(pool_size=(2, 2)))
student.add(Dropout(0.25))

student.add(Flatten())
student.add(Dense(512))
student.add(Activation('relu'))
student.add(Dropout(0.5))
student.add(Dense(num_classes))
student.add(Activation('softmax'))

# initiate RMSprop optimizer
opt = keras.optimizers.RMSprop(learning_rate=0.0001, decay=1e-6)

# Compiling the model using RMSprop
student.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])


# Load the cifar10 data, split between train and test sets:
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
x_train=x_train.astype('float32')/255
x_test=x_test.astype('float32')/255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
print(y_train.shape)

#checking that classes are balanced

#print(np.unique(y_train))
#print(y_train[0:40])

sample_size=1000
for i in range(10):
    print(str(i)+":",sum(y_train==i))

#Selecting 100 images of each class
k=0
x_small_train=np.zeros((sample_size,32,32,3))
y_small_train=np.full((sample_size,),-1)

for i in range(x_train.shape[0]):
    #print(i)
    for j in range(10):
        if sum(y_small_train==j)<sample_size/10:
            if y_train[i]==j:
                x_small_train[k,:]=x_train[i,:]
                y_small_train[k]=y_train[i]
                k+=1
                break
    #print('k=',k)
    if k==sample_size:
        break
        
#print(y_small_train[0:40])
print(x_small_train.shape)
print(y_small_train.shape)

#verifying that there are 100 images in each class
for i in range(10):
    print(str(i)+":",sum(y_small_train==i))
    
# Convert class vectors to binary class matrices.
y_small_train = keras.utils.to_categorical(y_small_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)    

#train teacher model
teacher.fit(x_small_train, y_small_train,
            batch_size=10,
            epochs=100,
            validation_split=0.,
            shuffle=True)


# Score trained model.
scores = teacher.evaluate(x_test, y_test, verbose=1)
print('Small sample of 1000 training images, Supervised learning model with '+str(100)+'epochs \n')
print('Test loss:', scores[0])
print('Test accuracy:', scores[1])


x_train shape: (50000, 32, 32, 3)
50000 train samples
10000 test samples
(50000, 1)
0: [5000]
1: [5000]
2: [5000]
3: [5000]
4: [5000]
5: [5000]
6: [5000]
7: [5000]
8: [5000]
9: [5000]
(1000, 32, 32, 3)
(1000,)
0: 100
1: 100
2: 100
3: 100
4: 100
5: 100
6: 100
7: 100
8: 100
9: 100
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
E

Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
Small sample of 1000 training images, Supervised learning model with 120epochs 

Test loss: 4.265565634918213
Test accuracy: 0.4131999909877777


In [11]:
# Varying the ratio for 1000 labeled images. Rest of training dataset unlabeled with ratio determining size of total dataset.
# Testing on full test dataset

# Load the cifar10 data, split between train and test sets:
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
x_train=x_train.astype('float32')/255
x_test=x_test.astype('float32')/255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
print(y_train.shape)

#checking that classes are balanced

#print(np.unique(y_train))
#print(y_train[0:40])

sample_size=1000
for i in range(10):
    print(str(i)+":",sum(y_train==i))

#Selecting 100 images of each class
k=0
x_small_train=np.zeros((sample_size,32,32,3))
y_small_train=np.full((sample_size,),-1)

for i in range(x_train.shape[0]):
    #print(i)
    for j in range(10):
        if sum(y_small_train==j)<sample_size/10:
            if y_train[i]==j:
                x_small_train[k,:]=x_train[i,:]
                y_small_train[k]=y_train[i]
                k+=1
                break
    #print('k=',k)
    if k==sample_size:
        break
        
#print(y_small_train[0:40])
print(x_small_train.shape)
print(y_small_train.shape)

#verifying that there are 100 images in each class
for i in range(10):
    print(str(i)+":",sum(y_small_train==i))
    
# Convert class vectors to binary class matrices.
y_small_train = keras.utils.to_categorical(y_small_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)    




# rate= n_true/n_pseudo, n_true=sample_size
rate=np.array([0.1,0.2,0.3,0.5,0.75,1,2,3])
#n_total=sample_size*(1+1/rate)

# total number of train images (n_total) = number of true label images (sample_size) + number of pseudo label images (n_pseudo)
#                                        = sample_size(1/rate+1)
# n_pseudo = sample_size/rate

#loop over rate values in order to find the optimal rate value for the self-learning semi supervised learning, 
#ie one that will maximize accuracy
for r in rate:
    print("rate="+str(r)+":\n")
    n_pseudo=int(sample_size/r)
    n_true=sample_size
    n_total=n_true+n_pseudo
    print(n_pseudo, n_true, n_total)
    x_true=x_small_train
    y_true=y_small_train
    x_pseudo=x_train[500:500+n_pseudo,:] #x for the unlabeled data (pseudo)
    #training teacher model on labeled data 
    training=teacher.fit(x_true,y_true,validation_split=0.,
                            epochs=10,batch_size=int(n_total/10),verbose=0)
    #evaluating teacher model on test data    
    scores=teacher.evaluate(x_test,y_test,verbose=0)
    print("Original model with labelled data only predicting on test data: ",scores[1])

    x_true_pseudo=np.concatenate([x_true,x_pseudo]) #concatenating x for labeled and unlabeled data
    print('x_true_pseudo.shape: ',x_true_pseudo.shape)
    prediction=teacher.predict_classes(x_pseudo) #predicting labels on unlabeled data
    y_pseudo=keras.utils.to_categorical(prediction, num_classes)
    y_true_pseudo=np.concatenate([y_true,y_pseudo]) #concatenating y for labeled and pseudo labeled
    print('y_true_pseudo.shape: ', y_true_pseudo.shape)
    for i in range(10): 
        # 10 loops of 10 epochs of noised student training for labeled and pseudo labeled data (step 3 in article)
        # followed by generating predictions on unlabeled data with the teacher model (=un-noised student)
        # which uses the weights of the trained noised student (noise does not change the weights structure of models) (step 2 in article)
        print(i)
        training=student.fit(x_true_pseudo,y_true_pseudo,validation_split=0.,
                             epochs=10,batch_size=int(n_total/10),verbose=0)
        # Save weights
        student.save_weights(teacher_path)
        # Load weights for teacher model (un-noised)
        teacher.load_weights(teacher_path)
        prediction=teacher.predict_classes(x_pseudo)
        scores=teacher.evaluate(x_test,y_test,verbose=0) #evaluating model on test data
        print('iteration: ',i)
        print('Test loss:', scores[0])
        print('Test accuracy:', scores[1])
        y_pseudo=keras.utils.to_categorical(prediction, num_classes)
        y_true_pseudo=np.concatenate([y_true,y_pseudo]) #new y_true_pseudo to be used in next loop
        


#from keras.utils import plot_model
#plot_model(teacher,to_file='teacher.png')
#plot_model(student,to_file='student.png')

x_train shape: (50000, 32, 32, 3)
50000 train samples
10000 test samples
(50000, 1)
0: [5000]
1: [5000]
2: [5000]
3: [5000]
4: [5000]
5: [5000]
6: [5000]
7: [5000]
8: [5000]
9: [5000]
(1000, 32, 32, 3)
(1000,)
0: 100
1: 100
2: 100
3: 100
4: 100
5: 100
6: 100
7: 100
8: 100
9: 100
rate=0.1:

10000 1000 11000
Original model with labelled data only predicting on test data:  0.42340001463890076
x_true_pseudo.shape:  (11000, 32, 32, 3)
y_true_pseudo.shape:  (11000, 10)
0
iteration:  0
Test loss: 1.848732839012146
Test accuracy: 0.3353999853134155
1
iteration:  1
Test loss: 2.6426416049957275
Test accuracy: 0.30869999527931213
2
iteration:  2
Test loss: 2.9602301971435545
Test accuracy: 0.30399999022483826
3
iteration:  3
Test loss: 3.2030816150665284
Test accuracy: 0.28780001401901245
4
iteration:  4
Test loss: 3.339076047515869
Test accuracy: 0.27469998598098755
5
iteration:  5
Test loss: 3.3592512908935546
Test accuracy: 0.27149999141693115
6
iteration:  6
Test loss: 3.2671358795166014
Tes

In [20]:
pip install pydot

Collecting pydotNote: you may need to restart the kernel to use updated packages.
  Downloading pydot-1.4.1-py2.py3-none-any.whl (19 kB)
Installing collected packages: pydot
Successfully installed pydot-1.4.1



In [5]:
pip install graphviz

Collecting graphviz
  Downloading graphviz-0.13.2-py2.py3-none-any.whl (17 kB)
Installing collected packages: graphviz
Successfully installed graphviz-0.13.2
Note: you may need to restart the kernel to use updated packages.
