<a href="https://colab.research.google.com/github/AyushKumarSD/Recyclable-and-Household-Waste-Classification/blob/main/VGG_19_100Epoch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os
import sys
import gdown
from tempfile import NamedTemporaryFile
from zipfile import ZipFile
import tarfile
import shutil

CHUNK_SIZE = 40960
FILE_ID = '13hIBU7BbRlNNlP8_yRaPuOvsiMXIlNFc'
DOWNLOAD_URL = f'https://drive.google.com/uc?export=download&id={FILE_ID}'
KAGGLE_INPUT_PATH = '/kaggle/input'
KAGGLE_WORKING_PATH = '/kaggle/working'
KAGGLE_SYMLINK = 'kaggle'

# Unmount and setup directories
!umount /kaggle/input/ 2> /dev/null
shutil.rmtree('/kaggle/input', ignore_errors=True)
os.makedirs(KAGGLE_INPUT_PATH, 0o777, exist_ok=True)
os.makedirs(KAGGLE_WORKING_PATH, 0o777, exist_ok=True)

# Setup symlinks
try:
    os.symlink(KAGGLE_INPUT_PATH, os.path.join("..", 'input'), target_is_directory=True)
except FileExistsError:
    pass
try:
    os.symlink(KAGGLE_WORKING_PATH, os.path.join("..", 'working'), target_is_directory=True)
except FileExistsError:
    pass

directory = 'recyclable-and-household-waste-classification'
destination_path = os.path.join(KAGGLE_INPUT_PATH, directory)
os.makedirs(destination_path, exist_ok=True)

try:
    print(f'Downloading {directory} from Google Drive')
    output_file = os.path.join(destination_path, 'archive.zip')
    gdown.download(DOWNLOAD_URL, output_file, quiet=False)

    # Extract the downloaded file
    if output_file.endswith('.zip'):
        with ZipFile(output_file, 'r') as zip_ref:
            zip_ref.extractall(destination_path)
    elif output_file.endswith('.tar.gz') or output_file.endswith('.tar'):
        with tarfile.open(output_file, 'r') as tar_ref:
            tar_ref.extractall(destination_path)
    else:
        raise ValueError(f"Unsupported file type: {output_file}")

    print(f'\nDownloaded and uncompressed: {directory}')
except Exception as e:
    print(f'Failed to load {DOWNLOAD_URL} to path {destination_path}: {e}')

print('Data source import complete.')


Downloading recyclable-and-household-waste-classification from Google Drive


Downloading...
From (original): https://drive.google.com/uc?export=download&id=13hIBU7BbRlNNlP8_yRaPuOvsiMXIlNFc
From (redirected): https://drive.google.com/uc?export=download&id=13hIBU7BbRlNNlP8_yRaPuOvsiMXIlNFc&confirm=t&uuid=46b58f62-ad5d-493f-9f72-7d3e407756fd
To: /kaggle/input/recyclable-and-household-waste-classification/archive.zip
100%|██████████| 965M/965M [00:10<00:00, 94.8MB/s]



Downloaded and uncompressed: recyclable-and-household-waste-classification
Data source import complete.


In [2]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
/kaggle/input/recyclable-and-household-waste-classification/images/images/glass_beverage_bottles/real_world/Image_174.png
/kaggle/input/recyclable-and-household-waste-classification/images/images/glass_beverage_bottles/real_world/Image_7.png
/kaggle/input/recyclable-and-household-waste-classification/images/images/glass_beverage_bottles/real_world/Image_135.png
/kaggle/input/recyclable-and-household-waste-classification/images/images/glass_beverage_bottles/real_world/Image_40.png
/kaggle/input/recyclable-and-household-waste-classification/images/images/glass_beverage_bottles/real_world/Image_68.png
/kaggle/input/recyclable-and-household-waste-classification/images/images/glass_beverage_bottles/real_world/Image_147.png
/kaggle/input/recyclable-and-household-waste-classification/images/images/glass_beverage_bottles/real_world/Image_11.png
/kaggle/input/recyclable-and-household-waste-classification/images/images/glass_bevera

In [3]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import Callback
from sklearn.metrics import confusion_matrix, classification_report, roc_auc_score, roc_curve, auc

In [4]:
# image directory
image_dir = "/kaggle/input/recyclable-and-household-waste-classification/images/images"

In [5]:
# data generator with data augmentation
datagen = ImageDataGenerator(rescale=1./255,
                             validation_split=0.2,
                             rotation_range=20,
                             width_shift_range=0.2,
                             height_shift_range=0.2,
                             shear_range=0.2,
                             zoom_range=0.2,
                             horizontal_flip=True,
                             fill_mode='nearest'
                            )

In [6]:
# parameters
input_size = (256, 256)
batch_size = 32
epochs = 100

In [7]:
# training data generator
train_gen = datagen.flow_from_directory(
    image_dir,
    target_size = input_size,
    batch_size = batch_size,
    class_mode = 'categorical',
    subset = 'training'
)

Found 12000 images belonging to 30 classes.


In [8]:
# validation data generator
valid_gen = datagen.flow_from_directory(
    image_dir,
    target_size = input_size,
    batch_size = batch_size,
    class_mode = 'categorical',
    subset = 'validation'
)

Found 3000 images belonging to 30 classes.


In [9]:
# Load the pre-trained VGG16 model without the fully connected layers
base_model = tf.keras.applications.VGG19(
    weights = 'imagenet',
    include_top = False,
    input_shape = (256, 256, 3)
)

In [10]:
# Freeze the pre-trained layers so they are not updated during training
for layer in base_model.layers:
    layer.trainable = False

# Add classification layers on top of the pre-trained model
x = base_model.output
x = tf.keras.layers.Conv2D(filters=100, kernel_size=(3, 3), activation='relu')(x)
x = tf.keras.layers.MaxPooling2D(pool_size=(2, 2))(x)
flatten = tf.keras.layers.Flatten()(x)
predictions = tf.keras.layers.Dense(30, activation='softmax')(flatten)

model = tf.keras.Model(inputs=base_model.input, outputs=predictions)

In [11]:
# Model Summary
model.summary()

In [12]:
model.compile(tf.keras.optimizers.Adam(learning_rate = 0.001),
             loss = 'categorical_crossentropy',
             metrics = ['accuracy'])

In [None]:
# History object contains the learning history e.g:- training and validation loss and accuracy
history = model.fit(train_gen,
                    steps_per_epoch = train_gen.samples // train_gen.batch_size,
                    epochs = epochs,
                    validation_data = valid_gen,
                    validation_steps = valid_gen.samples // valid_gen.batch_size
                   )

Epoch 1/100


  self._warn_if_super_not_called()


[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m287s[0m 701ms/step - accuracy: 0.2748 - loss: 2.6056 - val_accuracy: 0.5195 - val_loss: 1.5648
Epoch 2/100


  self.gen.throw(typ, value, traceback)


[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 47ms/step - accuracy: 0.0000e+00 - loss: 0.0000e+00 - val_accuracy: 0.4167 - val_loss: 1.9932
Epoch 3/100
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m264s[0m 694ms/step - accuracy: 0.5476 - loss: 1.5349 - val_accuracy: 0.6055 - val_loss: 1.2965
Epoch 4/100
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 989us/step - accuracy: 0.0000e+00 - loss: 0.0000e+00 - val_accuracy: 0.6667 - val_loss: 0.8966
Epoch 5/100
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m263s[0m 691ms/step - accuracy: 0.5921 - loss: 1.3505 - val_accuracy: 0.5981 - val_loss: 1.3011
Epoch 6/100
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.0000e+00 - loss: 0.0000e+00 - val_accuracy: 0.4583 - val_loss: 1.1655
Epoch 7/100
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m324s[0m 697ms/step - accuracy: 0.6225 - loss: 1.2439 - val_accuracy: 0.6317 - val_loss: 

In [1]:
# Plot learning curves
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs_range = range(len(acc))

NameError: name 'history' is not defined

In [None]:
plt.figure(figsize=(18, 6))

plt.subplot(1, 3, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

In [None]:
plt.subplot(1, 3, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')

plt.show()

In [None]:
# Evaluate model
valid_gen.reset()
Y_pred = model.predict(valid_gen, steps=valid_gen.samples // batch_size + 1)
y_pred = np.argmax(Y_pred, axis=1)
y_true = valid_gen.classes

In [None]:
import seaborn as sns
# Confusion matrix
cm = confusion_matrix(y_true, y_pred)
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=valid_gen.class_indices.keys(), yticklabels=valid_gen.class_indices.keys())
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()

In [None]:
# Classification report
print('Classification Report')
print(classification_report(y_true, y_pred, target_names=valid_gen.class_indices.keys()))

In [None]:
# ROC Curve and AUC
# Binarize the output for ROC and AUC calculations
y_true_bin = tf.keras.utils.to_categorical(y_true, num_classes=30)
y_pred_prob = model.predict(valid_gen, steps=valid_gen.samples // batch_size + 1)

In [None]:
fpr = {}
tpr = {}
roc_auc = {}
for i in range(10):
    fpr[i], tpr[i], _ = roc_curve(y_true_bin[:, i], y_pred_prob[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

# Plot ROC curve for each class
plt.figure(figsize=(10, 8))
for i in range(10):
    plt.plot(fpr[i], tpr[i], label=f'Class {i} (area = {roc_auc[i]:.2f})')

In [None]:
plt.plot([0, 1], [0, 1], 'k--', label='Random Guess')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc='lower right')
plt.show()

In [None]:
# Calculate and print the average AUC
all_fpr = np.unique(np.concatenate([fpr[i] for i in range(10)]))
mean_tpr = np.zeros_like(all_fpr)
for i in range(10):
    mean_tpr += np.interp(all_fpr, fpr[i], tpr[i])
mean_tpr /= 10
mean_auc = auc(all_fpr, mean_tpr)
print(f'Mean AUC: {mean_auc:.2f}')