In [2]:
%tensorflow_version 2.x
import tensorflow as tf
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

Found GPU at: /device:GPU:0


# Deep Learning Project

## Milestone report

## Study of the impact on the ratio of labeled to unlabeled data on top-1 accuracy on the CIFAR-10 dataset

Pierre Andurand (pa2570)
Tzu Yi Chuang (tc3075)
Kuan Yu Ko (kk3376)

Below we train a simple model in supervised learning without data augmentation. We check its performance on the cifar10 dataset. It will be our un-noised teacher model. And we will compare its performance with 36 epochs to the semi supervised self-learning model in the following block

In [4]:
from __future__ import print_function
import tensorflow.keras as keras
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D
import os

batch_size = 200
num_classes = 10
epochs = 60
data_augmentation = False
#num_predictions = 20
save_dir = os.path.join(os.getcwd(), 'saved_models')
teacher6_name = 'keras_cifar10_trained_teacher6.h5'

# The data, split between train and test sets:
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# Convert class vectors to binary class matrices.
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

teacher = Sequential()
teacher.add(Conv2D(32, (3, 3), padding='same',
                 input_shape=x_train.shape[1:]))
teacher.add(Activation('relu'))
teacher.add(Conv2D(32, (3, 3)))
teacher.add(Activation('relu'))
teacher.add(MaxPooling2D(pool_size=(2, 2)))
teacher.add(Dropout(0.25))

teacher.add(Conv2D(64, (3, 3), padding='same'))
teacher.add(Activation('relu'))
teacher.add(Conv2D(64, (3, 3)))
teacher.add(Activation('relu'))
teacher.add(MaxPooling2D(pool_size=(2, 2)))
teacher.add(Dropout(0.25))

teacher.add(Flatten())
teacher.add(Dense(512))
teacher.add(Activation('relu'))
#model.add(Dropout(0.5))
teacher.add(Dense(num_classes))
teacher.add(Activation('softmax'))


# Compiling the model using RMSprop
teacher.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255

#Training the model

if not data_augmentation:
    print('Not using data augmentation.')
    teacher.fit(x_train, y_train,
              batch_size=batch_size,
              epochs=epochs,
              validation_split=0.2,
              shuffle=True)
else:
    print('Using real-time data augmentation.')
    # This will do preprocessing and realtime data augmentation:
    datagen = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        zca_epsilon=1e-06,  # epsilon for ZCA whitening
        rotation_range=0,  # randomly rotate images in the range (degrees, 0 to 180)
        # randomly shift images horizontally (fraction of total width)
        width_shift_range=0.1,
        # randomly shift images vertically (fraction of total height)
        height_shift_range=0.1,
        shear_range=0.,  # set range for random shear
        zoom_range=0.,  # set range for random zoom
        channel_shift_range=0.,  # set range for random channel shifts
        # set mode for filling points outside the input boundaries
        fill_mode='nearest',
        cval=0.,  # value used for fill_mode = "constant"
        horizontal_flip=True,  # randomly flip images
        vertical_flip=False,  # randomly flip images
        # set rescaling factor (applied before any other transformation)
        rescale=None,
        # set function that will be applied on each input
        preprocessing_function=None,
        # image data format, either "channels_first" or "channels_last"
        data_format=None,
        # fraction of images reserved for validation (strictly between 0 and 1)
        validation_split=0.0)

    # Compute quantities required for feature-wise normalization
    # (std, mean, and principal components if ZCA whitening is applied).
    datagen.fit(x_train)

    # Fit the model on the batches generated by datagen.flow().
    teacher.fit_generator(datagen.flow(x_train, y_train,
                                     batch_size=batch_size),
                        epochs=epochs,
                        validation_split=0.2,
                        workers=4)
    
# Save model and weights
if not os.path.isdir(save_dir):
    os.makedirs(save_dir)
teacher6_path = os.path.join(save_dir, teacher6_name)
teacher.save(teacher6_path)
print('Saved trained model at %s ' % teacher6_path)

# Score trained model.
scores = teacher.evaluate(x_test, y_test, verbose=1)
print('Supervised learning model with '+str(epochs)+'epochs \n')
print('Test loss:', scores[0])
print('Test accuracy:', scores[1])


x_train shape: (50000, 32, 32, 3)
50000 train samples
10000 test samples
Not using data augmentation.
Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60
Epoch 37/60
Epoch 38/60
Epoch 39/60
Epoch 40/60
Epoch 41/60
Epoch 42/60
Epoch 43/60
Epoch 44/60
Epoch 45/60
Epoch 46/60
Epoch 47/60
Epoch 48/60
Epoch 49/60
Epoch 50/60
Epoch 51/60
Epoch 52/60
Epoch 53/60
Epoch 54/60
Epoch 55/60
Epoch 56/60
Epoch 57/60
Epoch 58/60
Epoch 59/60
Epoch 60/60
Saved trained model at /content/saved_models/keras_cifar10_trained_teacher6.h5 
Supervised learning model with 60epochs 

Test loss: 1.4294781684875488
Test accuracy: 0.7595000267028809


The accuracy of the fully supervised model is 0.7595. 
# Test 1
Below we would like to check if starting from the weights of the fully supervised model trained above, we would get an increase in accuracy if we run STNS on the full dataset with different ratios of labeled to unlabeled data. And if the accuracy goes up, we would like to see what is the optimal ratio. 
The STNS algorithm used is as follows:
We do a loop over different ratios of label to unlabelled data (rate). And each loop does the following:
1) take the weights from the fully supervised teacher model trained in box above 
2) Ten cycles of: un-noised model (teacher)->predict hard pseudolabel->training 10 epochs for noised model (student=teacher+dropout noise) on labeled+pseudo labeled->new weights. 

We check rates 0.1,0.25,0.5,1,2.5,5,10,20. The student model will be the teacher model noised by a Dropout(0.5) before the last layer

In [5]:
import numpy as np
#batch_size = 32
num_classes = 10
#epochs = 50
#data_augmentation = False
#num_predictions = 20
save_dir = os.path.join(os.getcwd(), 'saved_models')
teacher7_name = 'keras_cifar10_trained_teacher7.h5'

teacher7_path = os.path.join(save_dir, teacher7_name)

# Load the cifar10 data, split between train and test sets:
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# Convert class vectors to binary class matrices.
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)


x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255


# rate= n_true/n_pseudo
rate=np.array([0.1, 0.25, 0.5, 1,2.5,5,10,20])
n_total=x_train.shape[0]

# total number of train images (n_total) = number of true label images (n_true) + number of pseudo label images (n_pseudo)
#                                        = n_pseudo(rate+1)
# n_pseudo = n_total/(rate+1); n_true=n_total-n_pseudo

#loop over rate values in order to find the optimal rate value for the self-learning semi supervised learning, 
#ie one that will maximize accuracy
for r in rate:
    
    #making un-noised teacher model
    teacher = Sequential()
    teacher.add(Conv2D(32, (3, 3), padding='same',
                     input_shape=x_train.shape[1:]))
    teacher.add(Activation('relu'))
    teacher.add(Conv2D(32, (3, 3)))
    teacher.add(Activation('relu'))
    teacher.add(MaxPooling2D(pool_size=(2, 2)))
    teacher.add(Dropout(0.25))

    teacher.add(Conv2D(64, (3, 3), padding='same'))
    teacher.add(Activation('relu'))
    teacher.add(Conv2D(64, (3, 3)))
    teacher.add(Activation('relu'))
    teacher.add(MaxPooling2D(pool_size=(2, 2)))
    teacher.add(Dropout(0.25))

    teacher.add(Flatten())
    teacher.add(Dense(512))
    teacher.add(Activation('relu'))
    #model.add(Dropout(0.5)) #this will be uncommented for the noised student model
    teacher.add(Dense(num_classes))
    teacher.add(Activation('softmax'))


    # Compile the teacher model using RMSprop
    teacher.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])

    #noised student model

    student = Sequential()
    student.add(Conv2D(32, (3, 3), padding='same',
                 input_shape=x_train.shape[1:]))
    student.add(Activation('relu'))
    student.add(Conv2D(32, (3, 3)))
    student.add(Activation('relu'))
    student.add(MaxPooling2D(pool_size=(2, 2)))
    student.add(Dropout(0.25))
    student.add(Conv2D(64, (3, 3), padding='same'))
    student.add(Activation('relu'))
    student.add(Conv2D(64, (3, 3)))
    student.add(Activation('relu'))
    student.add(MaxPooling2D(pool_size=(2, 2)))
    student.add(Dropout(0.25))

    student.add(Flatten())
    student.add(Dense(512))
    student.add(Activation('relu'))
    student.add(Dropout(0.5))
    student.add(Dense(num_classes))
    student.add(Activation('softmax'))

    # Compiling the model using RMSprop
    student.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    print("rate="+str(r)+":\n")
    n_pseudo=n_total/(1+r)
    n_true=n_total-n_pseudo
    mask_true=np.random.choice(int(n_total),int(n_true),replace=False) #generating n_true integers between 0 and n_total-1
    mask_pseudo=[item for item in range(n_total) if item not in mask_true] #all the other numbers between 0 and n_total-1 which are not in mask_true
    mask_pseudo=np.array(mask_pseudo)
    x_true=x_train[mask_true[:]] #x for the labeled data
    y_true=y_train[mask_true[:]] #y for the labeled data
    x_pseudo=x_train[mask_pseudo[:]] #x for the unlabeled data (pseudo)
    #Taking weights from supervised model on full data (box above) 
    teacher.load_weights(teacher6_path)
    #evaluating teacher model on test data    
    scores=teacher.evaluate(x_test,y_test,verbose=0)
    print("Original model with labelled data only predicting on test data: ",scores[1])

    x_true_pseudo=np.concatenate([x_true,x_pseudo]) #concatenating x for labeled and unlabeled data
    print('x_true_pseudo.shape: ',x_true_pseudo.shape)
    prediction=teacher.predict_classes(x_pseudo) #predicting labels on unlabeled data
    y_pseudo=keras.utils.to_categorical(prediction, num_classes)
    y_true_pseudo=np.concatenate([y_true,y_pseudo]) #concatenating y for labeled and pseudo labeled
    print('y_true_pseudo.shape: ', y_true_pseudo.shape)
    for i in range(10): 
        # 10 loops of 10 epochs of noised student training for labeled and pseudo labeled data (step 3 in article)
        # followed by generating predictions on unlabeled data with the teacher model (=un-noised student)
        # which uses the weights of the trained noised student (noise does not change the weights structure of models) (step 2 in article)
        print(i)
        training=student.fit(x_true_pseudo,y_true_pseudo,validation_split=0.,
                             epochs=10,batch_size=200,verbose=0)
        # Save weights
        student.save_weights(teacher7_path)
        # Load weights for teacher model (un-noised)
        teacher.load_weights(teacher7_path)
        prediction=teacher.predict_classes(x_pseudo)
        scores=teacher.evaluate(x_test,y_test,verbose=0) #evaluating model on test data
        print('iteration: ',i)
        print('Test loss:', scores[0])
        print('Test accuracy:', scores[1])
        y_pseudo=keras.utils.to_categorical(prediction, num_classes)
        y_true_pseudo=np.concatenate([y_true,y_pseudo]) #new y_true_pseudo to be used in next loop
        

#from keras.utils import plot_model
#plot_model(teacher,to_file='teacher.png')
#plot_model(student,to_file='student.png')


x_train shape: (50000, 32, 32, 3)
50000 train samples
10000 test samples
rate=0.1:

Original model with labelled data only predicting on test data:  0.7595000267028809
x_true_pseudo.shape:  (50000, 32, 32, 3)
Instructions for updating:
Please use instead:* `np.argmax(model.predict(x), axis=-1)`,   if your model does multi-class classification   (e.g. if it uses a `softmax` last-layer activation).* `(model.predict(x) > 0.5).astype("int32")`,   if your model does binary classification   (e.g. if it uses a `sigmoid` last-layer activation).
y_true_pseudo.shape:  (50000, 10)
0
iteration:  0
Test loss: 0.6737338900566101
Test accuracy: 0.7709000110626221
1
iteration:  1
Test loss: 0.8928207159042358
Test accuracy: 0.766700029373169
2
iteration:  2
Test loss: 0.9718344211578369
Test accuracy: 0.7645999789237976
3
iteration:  3
Test loss: 1.0270256996154785
Test accuracy: 0.7735999822616577
4
iteration:  4
Test loss: 1.1127089262008667
Test accuracy: 0.7674000263214111
5
iteration:  5
Test los

We can see that the range of ratios of labeled/unlabeled data of 5 to 20 look optimal. This means that when one wants to find the best predictive algorithm on a given dataset that we cannot increase, keeping between 5% and 20% of the data as unlabeled and training a STSN would bring better accuracy (starting with the weights of supervised algo) . The accuracy on test data with the un-noised model used in supervised learning for 36 epochs is 0.7595, while the self-learning semi supervised learning algorithm with the same model and number of epochs give an accuracy in excess of 0.795. Noise was added on the model with a DropOut(0.5) on the layer before the last layer.
Below we do the same as we did for MNIST: We use a small labeled dataset of 5000 images. We then add a unlabeled dataset which size is determined by the ratio of labeled/unlabeled data. We first run and evaluate the supervised learning algorithm on the small labeled dataset, and then we run STNS and note the impact of the ratio on accuracy 

In [6]:
#supervised learning on small training dataset, testing on full testing dataset


#batch_size = 32
num_classes = 10
#epochs = 50
#data_augmentation = False
#num_predictions = 20
save_dir = os.path.join(os.getcwd(), 'saved_models')
teacher8_name = 'keras_cifar10_trained_teacher8.h5'

teacher8_path = os.path.join(save_dir, teacher8_name)

#making un-noised teacher model
teacher = Sequential()
teacher.add(Conv2D(32, (3, 3), padding='same',
                 input_shape=x_train.shape[1:]))
teacher.add(Activation('relu'))
teacher.add(Conv2D(32, (3, 3)))
teacher.add(Activation('relu'))
teacher.add(MaxPooling2D(pool_size=(2, 2)))
teacher.add(Dropout(0.25))

teacher.add(Conv2D(64, (3, 3), padding='same'))
teacher.add(Activation('relu'))
teacher.add(Conv2D(64, (3, 3)))
teacher.add(Activation('relu'))
teacher.add(MaxPooling2D(pool_size=(2, 2)))
teacher.add(Dropout(0.25))

teacher.add(Flatten())
teacher.add(Dense(512))
teacher.add(Activation('relu'))
#model.add(Dropout(0.5)) #this will be uncommented for the noised student model
teacher.add(Dense(num_classes))
teacher.add(Activation('softmax'))


# Compile the teacher model using RMSprop
teacher.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

#noised student model

student = Sequential()
student.add(Conv2D(32, (3, 3), padding='same',
                 input_shape=x_train.shape[1:]))
student.add(Activation('relu'))
student.add(Conv2D(32, (3, 3)))
student.add(Activation('relu'))
student.add(MaxPooling2D(pool_size=(2, 2)))
student.add(Dropout(0.25))

student.add(Conv2D(64, (3, 3), padding='same'))
student.add(Activation('relu'))
student.add(Conv2D(64, (3, 3)))
student.add(Activation('relu'))
student.add(MaxPooling2D(pool_size=(2, 2)))
student.add(Dropout(0.25))

student.add(Flatten())
student.add(Dense(512))
student.add(Activation('relu'))
student.add(Dropout(0.5))
student.add(Dense(num_classes))
student.add(Activation('softmax'))


# Compiling the model using RMSprop
student.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])


# Load the cifar10 data, split between train and test sets:
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
x_train=x_train.astype('float32')/255
x_test=x_test.astype('float32')/255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
print(y_train.shape)

#checking that classes are balanced

#print(np.unique(y_train))
#print(y_train[0:40])

sample_size=5000
for i in range(10):
    print(str(i)+":",sum(y_train==i))

#Selecting 500 images of each class
k=0
x_small_train=np.zeros((sample_size,32,32,3))
y_small_train=np.full((sample_size,),-1)

for i in range(x_train.shape[0]):
    #print(i)
    for j in range(10):
        if sum(y_small_train==j)<sample_size/10:
            if y_train[i]==j:
                x_small_train[k,:]=x_train[i,:]
                y_small_train[k]=y_train[i]
                k+=1
                break
    #print('k=',k)
    if k==sample_size:
        break
        
#print(y_small_train[0:40])
print(x_small_train.shape)
print(y_small_train.shape)

#verifying that there are 500 images in each class
for i in range(10):
    print(str(i)+":",sum(y_small_train==i))
    
# Convert class vectors to binary class matrices.
y_small_train = keras.utils.to_categorical(y_small_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)    

#train teacher model
teacher.fit(x_small_train, y_small_train,
            batch_size=10,
            epochs=40,
            validation_split=0.,
            shuffle=True)


# Score trained model.
scores = teacher.evaluate(x_test, y_test, verbose=1)
teacher.save_weights(teacher8_path)
print('Small sample of 5000 training images, Supervised learning model with '+str(40)+'epochs \n')
print('Test loss:', scores[0])
print('Test accuracy:', scores[1])


x_train shape: (50000, 32, 32, 3)
50000 train samples
10000 test samples
(50000, 1)
0: [5000]
1: [5000]
2: [5000]
3: [5000]
4: [5000]
5: [5000]
6: [5000]
7: [5000]
8: [5000]
9: [5000]
(5000, 32, 32, 3)
(5000,)
0: 500
1: 500
2: 500
3: 500
4: 500
5: 500
6: 500
7: 500
8: 500
9: 500
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40
Small sample of 5000 training images, Supervised learning model with 40epochs 

Test loss: 3.398430585861206
Test accuracy: 0.5134000182151794


In [7]:
# Varying the ratio for 5000 labeled images. Rest of training dataset unlabeled with ratio determining size of total dataset.
# Testing on full test dataset

save_dir = os.path.join(os.getcwd(), 'saved_models')
teacher9_name = 'keras_cifar10_trained_teacher9.h5'

teacher9_path = os.path.join(save_dir, teacher9_name)


# rate= n_true/n_pseudo, n_true=sample_size
rate=np.array([0.05, 0.1,0.25,0.5,0.75,1,2.5,5,10])
#n_total=sample_size*(1+1/rate)

# total number of train images (n_total) = number of true label images (sample_size) + number of pseudo label images (n_pseudo)
#                                        = sample_size(1/rate+1)
# n_pseudo = sample_size/rate

#loop over rate values in order to find the optimal rate value for the self-learning semi supervised learning, 
#ie one that will maximize accuracy
for r in rate:
    
    #making un-noised teacher model
    teacher = Sequential()
    teacher.add(Conv2D(32, (3, 3), padding='same',
                 input_shape=x_train.shape[1:]))
    teacher.add(Activation('relu'))
    teacher.add(Conv2D(32, (3, 3)))
    teacher.add(Activation('relu'))
    teacher.add(MaxPooling2D(pool_size=(2, 2)))
    teacher.add(Dropout(0.25))

    teacher.add(Conv2D(64, (3, 3), padding='same'))
    teacher.add(Activation('relu'))
    teacher.add(Conv2D(64, (3, 3)))
    teacher.add(Activation('relu'))
    teacher.add(MaxPooling2D(pool_size=(2, 2)))
    teacher.add(Dropout(0.25))

    teacher.add(Flatten())
    teacher.add(Dense(512))
    teacher.add(Activation('relu'))
    #model.add(Dropout(0.5)) #this will be uncommented for the noised student model
    teacher.add(Dense(num_classes))
    teacher.add(Activation('softmax'))


    # Compile the teacher model using RMSprop
    teacher.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])

    #noised student model

    student = Sequential()
    student.add(Conv2D(32, (3, 3), padding='same',
                 input_shape=x_train.shape[1:]))
    student.add(Activation('relu'))
    student.add(Conv2D(32, (3, 3)))
    student.add(Activation('relu'))
    student.add(MaxPooling2D(pool_size=(2, 2)))
    student.add(Dropout(0.25))

    student.add(Conv2D(64, (3, 3), padding='same'))
    student.add(Activation('relu'))
    student.add(Conv2D(64, (3, 3)))
    student.add(Activation('relu'))
    student.add(MaxPooling2D(pool_size=(2, 2)))
    student.add(Dropout(0.25))

    student.add(Flatten())
    student.add(Dense(512))
    student.add(Activation('relu'))
    student.add(Dropout(0.5))
    student.add(Dense(num_classes))
    student.add(Activation('softmax'))


    # Compiling the model using RMSprop
    student.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    
    
    print("rate="+str(r)+":\n")
    n_pseudo=int(sample_size/r)
    n_true=sample_size
    n_total=n_true+n_pseudo
    print(n_pseudo, n_true, n_total)
    x_true=x_small_train
    y_true=y_small_train
    x_pseudo=x_train[500:500+n_pseudo,:] #x for the unlabeled data (pseudo)
    #take weights from trained model on labeled data (calculated in box above)  
    teacher.load_weights(teacher8_path)
    #evaluating teacher model on test data    
    scores=teacher.evaluate(x_test,y_test,verbose=0)
    print("Original model with labelled data only predicting on test data: ",scores[1])

    x_true_pseudo=np.concatenate([x_true,x_pseudo]) #concatenating x for labeled and unlabeled data
    print('x_true_pseudo.shape: ',x_true_pseudo.shape)
    prediction=teacher.predict_classes(x_pseudo) #predicting labels on unlabeled data
    y_pseudo=keras.utils.to_categorical(prediction, num_classes)
    y_true_pseudo=np.concatenate([y_true,y_pseudo]) #concatenating y for labeled and pseudo labeled
    print('y_true_pseudo.shape: ', y_true_pseudo.shape)
    for i in range(10): 
        # 10 loops of 10 epochs of noised student training for labeled and pseudo labeled data (step 3 in article)
        # followed by generating predictions on unlabeled data with the teacher model (=un-noised student)
        # which uses the weights of the trained noised student (noise does not change the weights structure of models) (step 2 in article)
        print(i)
        training=student.fit(x_true_pseudo,y_true_pseudo,validation_split=0.,
                             epochs=10,batch_size=int(n_total/10),verbose=0)
        # Save weights
        student.save_weights(teacher9_path)
        # Load weights for teacher model (un-noised)
        teacher.load_weights(teacher9_path)
        prediction=teacher.predict_classes(x_pseudo)
        scores=teacher.evaluate(x_test,y_test,verbose=0) #evaluating model on test data
        print('iteration: ',i)
        print('Test loss:', scores[0])
        print('Test accuracy:', scores[1])
        y_pseudo=keras.utils.to_categorical(prediction, num_classes)
        y_true_pseudo=np.concatenate([y_true,y_pseudo]) #new y_true_pseudo to be used in next loop
        


#from keras.utils import plot_model
#plot_model(teacher,to_file='teacher.png')
#plot_model(student,to_file='student.png')

rate=0.05:

100000 5000 105000
Original model with labelled data only predicting on test data:  0.5134000182151794
x_true_pseudo.shape:  (54500, 32, 32, 3)
y_true_pseudo.shape:  (54500, 10)
0
iteration:  0
Test loss: 1.5580335855484009
Test accuracy: 0.4359000027179718
1
iteration:  1
Test loss: 2.3915750980377197
Test accuracy: 0.43369999527931213
2
iteration:  2
Test loss: 2.5407915115356445
Test accuracy: 0.4311000108718872
3
iteration:  3
Test loss: 2.529719829559326
Test accuracy: 0.4262999892234802
4
iteration:  4
Test loss: 2.583324432373047
Test accuracy: 0.4259999990463257
5
iteration:  5
Test loss: 2.761148452758789
Test accuracy: 0.4133000075817108
6
iteration:  6
Test loss: 2.661194324493408
Test accuracy: 0.4205999970436096
7
iteration:  7
Test loss: 2.72483491897583
Test accuracy: 0.4171000123023987
8
iteration:  8
Test loss: 2.5914015769958496
Test accuracy: 0.4251999855041504
9
iteration:  9
Test loss: 2.620016098022461
Test accuracy: 0.43149998784065247
rate=0.1:

5000