In [None]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import matplotlib.image as mping
from sklearn.model_selection import train_test_split
import os

## Understand the dataset

In [None]:
# get image names as a list

filenames = os.listdir("C:\\Users\\Anuruddha\\Desktop\\ML_projects\\30.CIFAR_1_Object_Recognition\\dataset\\train")

In [None]:
len(filenames)

In [None]:
print(filenames[0:5]) #first five image names

In [None]:
print(filenames[-5:]) # last five image names

In [None]:
label_df = pd.read_csv("C:\\Users\\Anuruddha\\Desktop\\ML_projects\\30.CIFAR_1_Object_Recognition\\dataset\\trainLabels.csv") # label table as pandas DF

In [None]:
label_df.shape  # here label data are not containing the order of images in traing folder

In [None]:
label_df.head()

In [None]:
label_df.tail()

In [None]:
label_df['label'].value_counts()

In [None]:
# display sample image in dataset
img = plt.imread("C:\\Users\\Anuruddha\\Desktop\\ML_projects\\30.CIFAR_1_Object_Recognition\\dataset\\train\\10.png")
plt.imshow(img)

## Data Preprocessing

### Label encoding

In [None]:
labels_dictionary = {'airplane':0, 'automobile':1, 'bird':2, 'cat':3, 'deer':4,
                    'dog':5, 'frog':6, 'horse':7, 'ship':8, 'truck':9}

labels = [labels_dictionary[i] for i in label_df['label']] # create label vector

In [None]:
#labels

In [None]:
list_id = list(label_df['id'])  # label data set id order

In [None]:
list_id

In [None]:
# THIS WILL TAKE LARGE AMOUNT OF TIME

# we need to read the data in order of label data set

# convert images as numpy arrays in order of labels

train_data_folder = "C:\\Users\\Anuruddha\\Desktop\\ML_projects\\30.CIFAR_1_Object_Recognition\\dataset\\train\\"

data = []

for id in list_id:
    image_path = train_data_folder + str(id) + '.png' # get image path
    image = Image.open(image_path)  # read images
    image = np.array(image) # convert as numpy array
    data.append(image)

In [None]:
len(data)

In [None]:
type(data[0])

In [None]:
data[0].shape

## Split data 

In [None]:
X = np.array(data)
Y = np.array(labels)

In [None]:
print(X.shape)
print(Y.shape)

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=2)

In [None]:
print(X.shape, X_train.shape, X_test.shape)

In [None]:
# noemalize dat between 0 and 1

X_train_scaled= X_train/255
X_test_scaled = X_test/255

## Model 

In [None]:
import tensorflow as tf
from tensorflow import keras

In [None]:
num_of_class = 10

model = keras.Sequential([
    
    keras.layers.Flatten(input_shape=(32,32,3)),
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dense(num_of_class, activation='softmax')
    
])

In [None]:
# compile

model.compile( optimizer='adam',
             loss='sparse_categorical_crossentropy',
             metrics=['acc'])    


In [None]:
model.fit(X_train_scaled, Y_train, validation_split=0.1, epochs=5)

## Model2 

In [None]:
from tensorflow.keras import Sequential, models, layers
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.models import load_model
from tensorflow.keras.models import Model
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras import optimizers

In [None]:
# import the pre-trained model

cnn_base = ResNet50(weights='imagenet', include_top=False, input_shape=(256,256,3))
# above weights='imagenet' mean import the ResNet50's weights those are trained using imagenet dataset
# include_top=False means drop the output layer

In [None]:
# information of the cnn model
cnn_base.summary()

In [None]:
model = models.Sequential()
model.add(layers.UpSampling2D((2,2))) # 32 shape ---.> 32*2= 64
model.add(layers.UpSampling2D((2,2))) # 64 ---> 64*2 = 128
model.add(layers.UpSampling2D((2,2))) # 128 ---> 128*2 = 256 shape
model.add(cnn_base)
model.add(layers.Flatten())
model.add(layers.BatchNormalization())  # all values are smae range (image values and all intermediate values normalized)
model.add(layers.Dense(128,activation='relu'))
model.add(Dropout(0.5))  # prevent overfitting error
model.add(layers.BatchNormalization())
model.add(layers.Dense(64,activation='relu'))
model.add(Dropout(0.5))
model.add(layers.BatchNormalization())
model.add(layers.Dense(num_of_class,activation='softmax'))


In [None]:
model.compile(optimizer=optimizers.RMSprop(learning_rate=2e-5),
             loss='sparse_categorical_crossentropy',
             metrics=['acc'])

In [None]:
# taken large time

history = model.fit(X_train_scaled, Y_train, validation_split=0.1, epochs=1)

### Evaluate the model

In [None]:
loss,accuracy = model.evaluate(X_test_scaled, Y_test)
print("Testing accuracy: ", accuracy)

In [None]:
h = history

# plot the loss vlues

plt.plot(h.histroy['loss'], label='train loss')
plt.plot(h.histroy['val_loss'], label='validation loss')
plt.legend()
plt.show()

# plot the acc vlues

plt.plot(h.histroy['acc'], label='train acc')
plt.plot(h.histroy['val_acc'], label='validation acc')
plt.legend()
plt.show()