In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf
import cv2
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report
from sklearn.linear_model import LogisticRegression
noisy = '/Users/peter/Downloads/train_data/noisy_labels.csv'
clean = '/Users/peter/Downloads/train_data/clean_labels.csv'

In [3]:
classes = ('plane', 'car', 'bird', 'cat','deer', 'dog', 'frog', 'horse', 'ship', 'truck')

In [4]:
clean_labels = pd.read_csv(clean,header = None).rename(columns={0:'label'})
noisy_labels = pd.read_csv(noisy,header=None).rename(columns={0:'label'}) #fixed zero index issue

In [44]:
direx = '/Users/peter/Downloads/train_data/'
list_of_labels = list(noisy_labels['label'])

In [45]:
n_img = 50000
n_noisy = 40000
n_clean_noisy = n_img - n_noisy
imgs = np.empty((n_img,32,32,3))
for i in range(n_img):
    img_fn = f'/Users/peter/Downloads/train_data/images/{i+1:05d}.png'
    imgs[i,:,:,:]=cv2.cvtColor(cv2.imread(img_fn),cv2.COLOR_BGR2RGB)

In [46]:
# [DO NOT MODIFY THIS CELL]
# RGB histogram dataset construction
no_bins = 6
bins = np.linspace(0,255,no_bins) # the range of the rgb histogram
target_vec = np.empty(n_img)
feature_mtx = np.empty((n_img,3*(len(bins)-1)))
i = 0
for i in range(n_img):
    # The target vector consists of noisy labels
    target_vec[i] = list_of_labels[i]
    
    # Use the numbers of pixels in each bin for all three channels as the features
    feature1 = np.histogram(imgs[i][:,:,0],bins=bins)[0] 
    feature2 = np.histogram(imgs[i][:,:,1],bins=bins)[0]
    feature3 = np.histogram(imgs[i][:,:,2],bins=bins)[0]
    
    # Concatenate three features
    feature_mtx[i,] = np.concatenate((feature1, feature2, feature3), axis=None)
    i += 1

In [47]:
target = tf.convert_to_tensor(target_vec, dtype=tf.int32)
imgs1 = tf.convert_to_tensor(imgs, dtype=tf.int32)


datagen = tf.keras.preprocessing.image.ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range = 30,  # randomly rotate images in the range (degrees, 0 to 180)
        zoom_range = 0.2, # Randomly zoom image 
        width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
        horizontal_flip = True,  # randomly flip images
        vertical_flip=False)  # randomly flip images

datagen.fit(imgs1)

#tfg = tf.data.Dataset.from_tensor_slices((imgs1,target)).batch(64)


In [48]:
#Model I FIT
model = tf.keras.Sequential()
model.add(tf.keras.layers.Rescaling(1./255))
model.add(tf.keras.layers.Conv2D(128, (3, 3), input_shape=(32,32,3)))
model.add(tf.keras.layers.Activation('relu'))
model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))

model.add(tf.keras.layers.Conv2D(64, (3, 3)))
model.add(tf.keras.layers.Activation('sigmoid'))
model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))

model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(128,activation = 'relu'))
model.add(tf.keras.layers.Dense(10))

model.compile(optimizer=tf.keras.optimizers.Adam(), 
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),  
              metrics=['accuracy']) 



In [49]:
model.fit(imgs1,target, epochs = 5) #50,000/32 = 1563 examples per training epoch $fit on dirty labels

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7fba55b32128>

In [None]:

#model.fit(tfg, epochs = 10) #50,000/32 = 1563 examples per training epoch

In [19]:
MobileNet = tf.keras.applications.mobilenet_v2.MobileNetV2(input_shape=(32,32,3), include_top=False, weights='imagenet')

MobileNet.trainable=False
model_transfer = tf.keras.Sequential([
                            tf.keras.layers.Rescaling(1./255),
                            MobileNet, 

                            tf.keras.layers.Flatten(),
                            tf.keras.layers.Dense(128),
                            tf.keras.layers.Dense(10, activation = 'relu')                            
                            ])

model_transfer.compile(optimizer=tf.keras.optimizers.Adam(), 
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),  
              metrics=['accuracy']) 



In [20]:
model_transfer.fit(tfg, epochs = 5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7fba54b88ac8>

In [None]:
target_vec