<a href="https://colab.research.google.com/github/Stereo-Alex/Frutavision/blob/main/Workbook_without_Augmentation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Connecting to the kaggle api and downloading the data 

In [None]:
!pip install kaggle
from google.colab import files
files.upload()
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [None]:
!kaggle datasets download -d chrisfilo/fruit-recognition

## Creating directories and unziping the data into the directories

In [None]:
!mkdir fruit
!unzip fruit-recognition.zip -d fruit


# Creating a df with the paths

In [None]:
import pandas as pd
import numpy as np
import os 
import cv2
import matplotlib.pyplot as plt

In [None]:
data_folder = '/content/fruit/'
paths = os.listdir(data_folder)
os.path.join(data_folder, paths[0])
list_of_paths = [os.path.join(data_folder, x) for x in paths]
list_of_paths

In [None]:
list_of_paths[0]

In [None]:
def data_frame_gen(list_of_paths):
    all_files = []
    for path in list_of_paths:
        
        for dirpath, _, filenames in os.walk(path):
            for fn in filenames:
                all_files.append(os.path.join(dirpath, fn))
    
    img_df = pd.DataFrame({'Path': all_files})
    
    img_df['Fruit'] = img_df['Path'].apply(lambda p: p.split(os.sep)[3])
    img_df = img_df[['Fruit', 'Path']]            
    
    return img_df


In [None]:
df = data_frame_gen(list_of_paths)

In [None]:
df.shape

In [None]:
df

In [None]:
## Removing DS_store

for path in df.Path:
    count_to_drop = 0 
    if 'DS_Store'in str(path):
        df = df.drop(df.index[count_to_drop])
    count_to_drop = count_to_drop + 1
        

In [None]:
df.groupby('Fruit').count()


In [None]:
img = cv2.imread(df['Path'][1], cv2.COLOR_BGR2RGB)
img_GRB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
plt.imshow(img_GRB)
#print(img_GRB)
plt.show()

## Preping the data 

In [None]:
# adding a label to the data 

fruit_names = sorted(df.Fruit.unique())
mapper_fruit_names = dict(zip(fruit_names, [t for t in range(len(fruit_names))]))
df["label"] = df["Fruit"].map(mapper_fruit_names)
print(mapper_fruit_names)

# Visualize the resulting dataframe
df.head()

# Preping to model

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

data_gen = ImageDataGenerator(rescale=1/255, 
                              validation_split = 0.1)

train_generator = data_gen.flow_from_directory(
        '/content/fruit',  
        target_size=(150, 150),  
        batch_size=128,
        class_mode='categorical',
        subset = 'training'
        )

validation_generator = data_gen.flow_from_directory(
        '/content/fruit',  
        target_size=(150, 150),  
        batch_size=128,
        class_mode='categorical',
        subset = 'validation'
        )




In [None]:
shape_img = (150,150,3)

In [None]:
# Basic fine tuning parameters: 

epochs = 15
loss_function = 'categorical_crossentropy'
optimizer = 'adam'
metrics = ['accuracy']



# Model


In [None]:
import tensorflow as tf
from tensorflow.keras import layers
  
model =  tf.keras.models.Sequential([

        tf.keras.layers.Conv2D(32 ,(3,3), activation = 'relu', input_shape = shape_img, padding = 'same'),
        tf.keras.layers.MaxPooling2D(2,2),

        tf.keras.layers.Conv2D(64 ,(3,3), activation = 'relu',padding = 'same'),
        tf.keras.layers.MaxPooling2D(2,2),

        tf.keras.layers.Conv2D(64 ,(3,3), activation = 'relu', padding = 'same'),
        tf.keras.layers.MaxPooling2D(2,2),

        tf.keras.layers.Conv2D(64 ,(3,3), activation = 'relu', padding = 'same'),
        tf.keras.layers.MaxPooling2D(2,2),

        tf.keras.layers.Conv2D(64 ,(3,3), activation = 'relu', padding = 'same'),
        tf.keras.layers.MaxPooling2D(2,2),

        tf.keras.layers.Conv2D(32 ,(3,3), activation = 'relu', padding = 'same'),
        tf.keras.layers.MaxPooling2D(2,2),

        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(256, activation='relu'),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(15, activation = 'softmax')

                                     
])



model.summary()

model.compile(loss=loss_function,
              optimizer=optimizer,
              metrics=metrics)

In [None]:
history = model.fit(train_generator,
                    epochs=epochs ,
                    validation_data = validation_generator)

# Benchmarking

In [None]:
import matplotlib.pyplot as plt

acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(len(acc))

plt.plot(epochs, acc, 'r', label='Training accuracy')
plt.plot(epochs, val_acc, 'b', label='Validation accuracy')
plt.title('Training and validation accuracy for 5 epochs')
plt.legend(loc=0)
plt.figure()


plt.show()

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(len(acc))

plt.plot(epochs, acc, 'r', label='Training accuracy')
plt.plot(epochs, val_acc, 'b', label='Validation accuracy')
plt.title('Training and validation accuracy for 15 epochs')
plt.legend(loc=0)
plt.figure()


plt.show()