# Kaggle API Processing to download dataset

In [1]:
!pip install kaggle
# Create .kaggle folder in ../root directory
!mkdir ~/.kaggle

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [2]:
# Upload kaggle.json => Personal API Token downloaded from kaggle
from google.colab import files
files.upload()

Saving kaggle.json to kaggle.json


{'kaggle.json': b'{"username":"akritighosh","key":"6338b709da906e0e129e6ff19fe17260"}'}

In [3]:
# Copy kaggle.json file to created .kaggle folder
!cp kaggle.json ~/.kaggle/
# Change permissions for the file
!chmod 600 ~/.kaggle/kaggle.json

## Download dataset zip 

In [4]:
!kaggle datasets download asdasdasasdas/garbage-classification

Downloading garbage-classification.zip to /content
 70% 57.0M/82.0M [00:00<00:00, 101MB/s] 
100% 82.0M/82.0M [00:00<00:00, 117MB/s]


## Unzip the compressed dataset

In [None]:
!unzip garbage-classification.zip -d data

## Delete duplicate image folder

In [7]:
!rm -rf '/content/data/Garbage classification'

# Import necessary libraries

In [8]:
# Interacting with the operating system and perform file management tasks
import shutil
# To create, edit and process dataset from csv files
import pandas as pd
# For mathematical operations and multidimensional arrays
import numpy as np
import math
# For image augmentation
from tensorflow.keras.preprocessing.image import  load_img, img_to_array, ImageDataGenerator
# For creating model
import tensorflow as tf

# Import labelled data for train, validation and test dataset

In [9]:
classes = {1:'glass', 2:'paper',3:'cardboard',4:'plastic',5:'metal',6:'trash'}

In [10]:
df = pd.read_csv('/content/data/one-indexed-files.txt',   header=None, names=['File','Label'], sep=' ')
df_train = pd.read_csv('/content/data/one-indexed-files-notrash_train.txt',   header=None, names=['File','Label'], sep=' ')
df_val = pd.read_csv('/content/data/one-indexed-files-notrash_val.txt',   header=None, names=['File','Label'], sep=' ')
df_test = pd.read_csv('/content/data/one-indexed-files-notrash_test.txt',   header=None, names=['File','Label'], sep=' ')

## Train dataset

In [11]:
# Dataframe info
print(df_train.info())
# Get shape
print('No of rows', df_train.shape[0])
df_train

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1768 entries, 0 to 1767
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   File    1768 non-null   object
 1   Label   1768 non-null   int64 
dtypes: int64(1), object(1)
memory usage: 27.8+ KB
None
No of rows 1768


Unnamed: 0,File,Label
0,cardboard202.jpg,3
1,paper472.jpg,2
2,paper522.jpg,2
3,glass189.jpg,1
4,glass325.jpg,1
...,...,...
1763,cardboard6.jpg,3
1764,glass283.jpg,1
1765,metal335.jpg,5
1766,plastic133.jpg,4


## Validation dataset

In [12]:
# Dataframe info
print(df_val.info())
# Get shape
print('No of rows', df_val.shape[0])
df_val

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 328 entries, 0 to 327
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   File    328 non-null    object
 1   Label   328 non-null    int64 
dtypes: int64(1), object(1)
memory usage: 5.2+ KB
None
No of rows 328


Unnamed: 0,File,Label
0,cardboard114.jpg,3
1,plastic204.jpg,4
2,glass123.jpg,1
3,glass152.jpg,1
4,glass398.jpg,1
...,...,...
323,metal58.jpg,5
324,cardboard149.jpg,3
325,plastic5.jpg,4
326,glass23.jpg,1


## Test dataset

In [13]:
# Dataframe info
print(df_test.info())
# Get shape
print('No of rows', df_test.shape[0])
df_test

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 431 entries, 0 to 430
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   File    431 non-null    object
 1   Label   431 non-null    int64 
dtypes: int64(1), object(1)
memory usage: 6.9+ KB
None
No of rows 431


Unnamed: 0,File,Label
0,paper70.jpg,2
1,paper380.jpg,2
2,cardboard31.jpg,3
3,glass12.jpg,1
4,paper169.jpg,2
...,...,...
426,metal389.jpg,5
427,paper303.jpg,2
428,paper405.jpg,2
429,paper465.jpg,2


# Create the dataset as per requirement


### Current format of image dataset : -
data / <br/>
. class 1/ <br/>
.... img1 <br/>
.... img2 <br/>
. class 2/ <br/>
. class 3/ <br/>
. class 4/ <br/>


### Format required - 
data / <br/>
.. train / <br/>
.... class 1/ <br/>
....... img1 <br/>
....... img2 <br/>
.... class 2/ <br/>
.... class 3/ <br/>
.... class 4/ <br/>
.. test / <br/>
.... class 1/ <br/>
....... img1 <br/>
....... img2 <br/>
.... class 2/ <br/>
.... class 3/ <br/>
.... class 4/ <br/>


## Train directory

In [14]:
!mkdir '/content/data/train'
!mkdir '/content/data/train/glass'
!mkdir '/content/data/train/paper'
!mkdir '/content/data/train/cardboard'
!mkdir '/content/data/train/plastic'
!mkdir '/content/data/train/metal'
!mkdir '/content/data/train/trash'

In [15]:
for i in range(df_train.shape[0]):
  try:
    folder = classes[int(df_train.iloc[i,1])]
    new_path = '/content/data/train/' + folder
    # old_path = '/content/data/garbage classification/Garbage classification/', folder , str(df_train.iloc[i,0]))
    old_path = '/content/data/garbage classification/Garbage classification/' + folder + '/' + str(df_train.iloc[i,0])
    shutil.move(old_path, new_path)
  except:
    print(i, str(df_train.iloc[i,0]))

## Validation directory

In [16]:
!mkdir '/content/data/val'
!mkdir '/content/data/val/glass'
!mkdir '/content/data/val/paper'
!mkdir '/content/data/val/cardboard'
!mkdir '/content/data/val/plastic'
!mkdir '/content/data/val/metal'
!mkdir '/content/data/val/trash'

In [17]:
for i in range(df_val.shape[0]):
  try:
    folder = classes[int(df_val.iloc[i,1])]
    new_path = '/content/data/val/' + folder
    # old_path = '/content/data/garbage classification/Garbage classification/', folder , str(df_val.iloc[i,0]))
    old_path = '/content/data/garbage classification/Garbage classification/' + folder + '/' + str(df_val.iloc[i,0])
    shutil.move(old_path, new_path)
  except:
    print(i, str(df_val.iloc[i,0]))

## Test directory

In [18]:
!mkdir '/content/data/test'
!mkdir '/content/data/test/glass'
!mkdir '/content/data/test/paper'
!mkdir '/content/data/test/cardboard'
!mkdir '/content/data/test/plastic'
!mkdir '/content/data/test/metal'
!mkdir '/content/data/test/trash'

In [19]:
for i in range(df_test.shape[0]):
  try:
    folder = classes[int(df_test.iloc[i,1])]
    new_path = '/content/data/test/' + folder
    # old_path = '/content/data/garbage classification/Garbage classification/', folder , str(df_test.iloc[i,0]))
    old_path = '/content/data/garbage classification/Garbage classification/' + folder + '/' + str(df_test.iloc[i,0])
    shutil.move(old_path, new_path)
  except:
    print(i, str(df_test.iloc[i,0]))

## Delete old directory

In [20]:
!rm -rf '/content/data/garbage classification'

# Data Augmentation

In [21]:
data_gen = ImageDataGenerator(
        rotation_range=45,
        # width_shift_range=0.2,
        # height_shift_range=0.2,
        horizontal_flip=True,
        vertical_flip=True,
        # brightness_range=[0.75,1.25],
        rescale=1./255,
        # zoom_range=0.25,
        # fill_mode='nearest'
        )

test_gen = ImageDataGenerator(rescale=1./255)

In [22]:
train_data_gen = data_gen.flow_from_directory(
                                        batch_size=128,                           # number of images in each batch
                                        directory='/content/data/train',          # the source of your images
                                        shuffle=True,                             # the images will be shuffle
                                        target_size=(128, 128),                   # size (height, width) of your images
                                        class_mode='categorical'                  # categorical because the dataset contains more than 2 labels
                                        )
val_data_gen = data_gen.flow_from_directory(
                                        batch_size=128,                           # number of images in each batch
                                        directory='/content/data/val',            # the source of your images
                                        shuffle=True,                             # the images will be shuffle
                                        target_size=(128, 128),                   # size (height, width) of your images
                                        class_mode='categorical'                  # categorical because the dataset contains more than 2 labels
                                        )
val_test_gen = test_gen.flow_from_directory(
                                        batch_size=128,                           # number of images in each batch
                                        directory='/content/data/val',            # the source of your images
                                        shuffle=True,                             # the images will be shuffle
                                        target_size=(128, 128),                   # size (height, width) of your images
                                        class_mode='categorical'                  # categorical because the dataset contains more than 2 labels
                                        )
test_data_gen = test_gen.flow_from_directory(
                                        batch_size=128,                           # number of images in each batch
                                        directory='/content/data/test',           # the source of your images
                                        shuffle=True,                             # the images will be shuffle
                                        target_size=(128, 128),                   # size (height, width) of your images
                                        class_mode='categorical'                  # categorical because the dataset contains more than 2 labels
                                        )

Found 1768 images belonging to 6 classes.
Found 328 images belonging to 6 classes.
Found 328 images belonging to 6 classes.
Found 431 images belonging to 6 classes.


# Model Training

* Validation - val_data_gen
* 1 conv layer - 16 with batch normalization and dropout 
* 2 32 dense layers with batch normalization

In [39]:
m_best = tf.keras.models.Sequential([
                                    tf.keras.layers.Conv2D(16, (3,3), activation='relu', input_shape =(128,128,3)),
                                    tf.keras.layers.BatchNormalization(),
                                    tf.keras.layers.MaxPooling2D(2,2),
                                    tf.keras.layers.Dropout(0.5),

                                    tf.keras.layers.Flatten(),
                                    # tf.keras.layers.Dense(64, activation='relu'),
                                    tf.keras.layers.Dense(1024, activation='relu'),
                                    tf.keras.layers.Dense(1024, activation='relu'),
                                    tf.keras.layers.Dense(6, activation='softmax')
])


In [40]:
# decrt = 0.1/120
# m_best.compile(loss='categorical_crossentropy', optimizer=tf.keras.optimizers.Adam(learning_rate = 0.1, decay = decrt), metrics=['accuracy'])
m_best.compile(loss='categorical_crossentropy', optimizer=tf.keras.optimizers.Adam(learning_rate = 0.00001), metrics=['accuracy'])
es = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=10, verbose=1, restore_best_weights=True)

m_best.fit(
  train_data_gen,
  batch_size = 32, 
  callbacks=[es], 
  validation_data=val_test_gen, 
  epochs=120
)

Epoch 1/120
Epoch 2/120
Epoch 3/120
Epoch 4/120
Epoch 5/120
Epoch 6/120
Epoch 7/120
Epoch 8/120
Epoch 9/120
Epoch 10/120
Epoch 11/120
Epoch 12/120
Epoch 13/120
Epoch 14/120
Epoch 15/120
Epoch 16/120
Epoch 17/120
Epoch 18/120
Epoch 19/120
Epoch 20/120
Epoch 21/120
Epoch 22/120
Epoch 23/120
Epoch 24/120
Epoch 25/120
Epoch 26/120
Epoch 27/120
Epoch 28/120
Epoch 29/120
Epoch 30/120
Epoch 31/120
Epoch 32/120
Epoch 33/120
Epoch 34/120
Epoch 35/120
Epoch 36/120
Epoch 37/120
Epoch 38/120
Epoch 39/120
Epoch 40/120
Epoch 41/120
Epoch 42/120
Epoch 43/120
Epoch 44/120
Epoch 45/120
Epoch 46/120
Epoch 47/120
Epoch 48/120
Epoch 49/120
Epoch 50/120
Epoch 51/120
Epoch 52/120
Epoch 53/120
Epoch 54/120
Epoch 55/120
Epoch 56/120
Epoch 57/120
Epoch 58/120
Epoch 59/120
Epoch 60/120
Epoch 61/120
Epoch 62/120
Epoch 63/120
Epoch 64/120
Epoch 64: early stopping


<keras.callbacks.History at 0x7f13d6b02490>

In [41]:
m_best.fit(
  train_data_gen,
  batch_size = 160,
  callbacks=[es], 
  validation_data=val_test_gen, 
  epochs=120
)

Epoch 1/120
Epoch 2/120
Epoch 3/120
Epoch 4/120
Epoch 5/120
Epoch 6/120
Epoch 7/120
Epoch 8/120
Epoch 9/120
Epoch 10/120
Epoch 11/120
Epoch 12/120
Epoch 13/120
Epoch 14/120
Epoch 15/120
Epoch 16/120
Epoch 17/120
Epoch 18/120
Epoch 19/120
Epoch 20/120
Epoch 21/120
Epoch 21: early stopping


<keras.callbacks.History at 0x7f13d5d40550>

In [42]:
m_best.fit(
  train_data_gen,
  batch_size = 64,
  callbacks=[es], 
  validation_data=val_test_gen, 
  epochs=120
)

Epoch 1/120
Epoch 2/120
Epoch 3/120
Epoch 4/120
Epoch 5/120
Epoch 6/120
Epoch 7/120
Epoch 8/120
Epoch 9/120
Epoch 10/120
Epoch 11/120
Epoch 12/120
Epoch 13/120
Epoch 14/120
Epoch 15/120
Epoch 16/120
Epoch 17/120
Epoch 18/120
Epoch 19/120
Epoch 20/120
Epoch 21/120
Epoch 22/120
Epoch 23/120
Epoch 24/120
Epoch 25/120
Epoch 26/120
Epoch 27/120
Epoch 28/120
Epoch 29/120
Epoch 29: early stopping


<keras.callbacks.History at 0x7f135a670710>

In [27]:
m_best.save('/content/78val74train.h5')
files.download('/content/78val74train.h5')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [35]:
from keras.applications.vgg16 import VGG16
from keras.models import Model

In [37]:
conv_base = VGG16(include_top=False,
                     weights='imagenet', 
                     input_shape=(128,128,3))
for layer in conv_base.layers:
  layer.trainable = False
top_model = conv_base.output
top_model = tf.keras.layers.Flatten(name="flatten")(top_model)
top_model = tf.keras.layers.Dense(4096, activation='relu')(top_model)
top_model = tf.keras.layers.Dense(1072, activation='relu')(top_model)
top_model = tf.keras.layers.Dropout(0.2)(top_model)
output_layer = tf.keras.layers.Dense(6, activation='softmax')(top_model)

# Group the convolutional base and new fully-connected layers into a Model object.
model = Model(inputs=conv_base.input, outputs=output_layer)

# Compiles the model for training.
model.compile(optimizer='adam', 
              loss='categorical_crossentropy',
              metrics=['accuracy'])
vgg_history = model.fit(train_data_gen, batch_size = 64, callbacks=[es], validation_data=val_test_gen, epochs=120)

Epoch 1/120
Epoch 2/120
Epoch 3/120
Epoch 4/120
Epoch 5/120
Epoch 6/120
Epoch 7/120
Epoch 8/120
Epoch 9/120
Epoch 10/120
Epoch 11/120
Epoch 12/120
Epoch 13/120
Epoch 14/120
Epoch 15/120
Epoch 16/120
Epoch 17/120
Epoch 18/120
Epoch 19/120
Epoch 20/120
Epoch 21/120
Epoch 22/120
Epoch 23/120
Epoch 24/120
Epoch 25/120
Epoch 26/120
Epoch 27/120
Epoch 28/120
Epoch 29/120
Epoch 29: early stopping


In [38]:
model.save('/content/model.h5')
files.download('/content/model.h5')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>