#Loading Libraries

**All Python capabilities are not loaded to our working environment by default (even they are already installed in your system). So, we import each and every library that we want to use.**

**We chose alias names for our libraries for the sake of our convenience (numpy --> np and pandas --> pd, tensorlow --> tf).**

**Note: You can import all the libraries that you think will be required or can import it as you go along**

In [None]:
import pandas as pd                                     # Data analysis and manipultion tool
import numpy as np                                      # Fundamental package for linear algebra and multidimensional arrays
import tensorflow as tf                                 # Deep Learning Tool
import os                                               # OS module in Python provides a way of using operating system dependent functionality
import cv2                                              # Library for image processing
from sklearn.model_selection import train_test_split    # For splitting the data into train and validation set

#Loading and preparing training data & Data Pre-processing

In [None]:
#Getting the labels of the images
labels = pd.read_csv("/content/content/fruits_data/Training_set.csv")   # loading the labels


#Getting images file path
file_paths = [[fname, '/content/content/fruits_data/train/' + fname] for fname in labels['filename']]


#Converting the file_paths to dataframe
images = pd.DataFrame(file_paths, columns=['filename', 'filepaths'])

#Combining the labels with the images
train_data = pd.merge(images, labels, how = 'inner', on = 'filename')

from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
train_data['label'] = le.fit_transform(train_data['label'])


"""
It is necessary to bring all the images in the same shape and size, also convert them to their pixel values because all machine learning 
or deep learning models accepts only the numerical data. Also we need to convert all the labels from categorical to numerical values.
"""
data = []     # initialize an empty numpy array
image_size = 100      # image size taken is 100 here. one can take other size too
for i in range(len(train_data)):
  
  img_array = cv2.imread(train_data['filepaths'][i])   # converting the image to gray scale
  new_img_array = cv2.resize(img_array, (image_size, image_size))      # resizing the image array
  data.append([new_img_array, train_data['label'][i]])

#Shuffle the data
np.random.shuffle(data)

#Separating the images and labels
x = []
y = []
for image in data:
  x.append(image[0])
  y.append(image[1])

# converting x & y to numpy array as they are list
x = np.array(x)
y = np.array(y)

x =  x.reshape(-1, 100, 100, 3)

#Splitting the data into Train and Validation Set
# split the data
"""
We want to check the performance of the model that we built. For this purpose, we always split (both independent and dependent data) the given data into 
training set which will be used to train the model,  and test set which will be used to check how accurately the model is predicting outcomes.
"""
X_train, X_val, y_train, y_val = train_test_split(x,y,test_size=0.4, random_state = 42)


X_train = X_train.astype('float32')/255
X_val = X_val.astype('float32')/255

#Building Model


In [None]:
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras import datasets, layers, models, losses, Model


base_model = tf.keras.applications.vgg16.VGG16(weights = 'imagenet', include_top = False, input_shape = (100,100,3))

for layer in base_model.layers:
  layer.trainable = False


x= layers.Flatten()(base_model.output)
x= layers.Dense(150, activation='relu')(x)
x= layers.Dropout(0.3)(x)
predictions = layers.Dense(131, activation = 'softmax')(x)
head_model = Model(inputs = base_model.input, outputs = predictions)
head_model.summary()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 100, 100, 3)]     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 100, 100, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 100, 100, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 50, 50, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 50, 50, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 50, 50, 128)      

In [None]:
head_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
head_model.fit(X_train, y_train, batch_size=128, epochs=15, validation_data=(X_val, y_val))


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<tensorflow.python.keras.callbacks.History at 0x7fb0dbc60710>

In [None]:
#Validate the model
head_model.evaluate(X_val,y_val)

#head_model.save('Fruits_360.h5')



[0.0013216972583904862, 1.0]

#Predict The Output For Testing Dataset

In [None]:
#Load Test Set
test_image_order = pd.read_csv("/content/content/fruits_data/Testing_set.csv")

#Getting images file path
file_paths = [[fname, '/content/content/fruits_data/test/' + fname] for fname in test_image_order['filename']]

#Converting the file_paths to dataframe
test_images = pd.DataFrame(file_paths, columns=['filename', 'filepaths'])

#Data Pre-processing on test_data
test_pixel_data = []     # initialize an empty numpy array
image_size = 100      # image size taken is 100 here. one can take other size too
for i in range(len(test_images)):
  
  img_array = cv2.imread(test_images['filepaths'][i])   # converting the image to gray scale

  new_img_array = cv2.resize(img_array, (image_size, image_size))      # resizing the image array

  test_pixel_data.append(new_img_array)


test_pixel_data = np.array(test_pixel_data)

test_pixel_data =  test_pixel_data.reshape(-1, 100, 100, 3)


test_pixel_data = test_pixel_data.astype('float32')/255

#Make Prediction on Test Dataset
pred = head_model.predict(test_pixel_data)

prediction = []
for value in pred:
  prediction.append(np.argmax(value))

predictions = le.inverse_transform(prediction)

#Save prediction results locally via colab notebook

In [None]:
res = pd.DataFrame({'filename': test_images['filename'], 'label': predictions})  # prediction is nothing but the final predictions of your model on input features of your new unseen test data
res.to_csv("submission.csv", index = False) 

# To download the csv file locally
from google.colab import files        
files.download('submission.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

#For people who work locally : Save prediciton results locally via jupyter notebook

In [None]:
res = pd.DataFrame({'filename': test_images['filename'], 'label': predictions}) # prediction is nothing but the final predictions of your model on input features of your new unseen test data
res.to_csv("submission.csv", index = False) # the csv file will be saved locally on the same location where this notebook is located.