WASTE MANAGEMENT
PROBLEM

Waste management is a big problem in our country. Most of the wastes end up in landfills. This leads to many issues like

Increase in landfills
Eutrophication
Consumption of toxic waste by animals
Increase in toxins
Land, water and air pollution
APPROACH

Analysed the components of household waste
Segregated into two classes (Organic and recyclable)
Automated the process by using IOT and machine learning
Reduce toxic waste ending in landfills

In [1]:
# !nvidia-smi

In [2]:
import keras
import tensorflow as tf

2021-10-16 19:54:53.044240: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-10-16 19:54:53.044297: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [3]:
# print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

In [4]:
# from tensorflow.python.client import device_lib 
# print(device_lib.list_local_devices())

In [5]:
### IMPORT LIBRARIES

In [6]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt 
import skimage.io
import tensorflow 
import tqdm
import glob

from tqdm import tqdm 

from skimage.io import imread, imshow
from skimage.transform import resize

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.vgg16 import VGG16

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import InputLayer, Dense, Flatten, BatchNormalization, Dropout, Activation
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.model_selection import train_test_split 
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import roc_auc_score
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score
%matplotlib inline

In [7]:
### IMPORT DATASET

In [8]:
train_o = glob.glob('Waste_DATASET/TRAIN/O/*.jpg')
a = len(train_o)

In [9]:
train_r = glob.glob('Waste_DATASET/TRAIN/R/*.jpg')
b = len(train_r)

In [10]:
# Total training images 
print("Nos of training samples: {}".format(a+b))

Nos of training samples: 22564


In [11]:
#### DATA AUGMENTATION

In [12]:
train_datagen = ImageDataGenerator(rescale = 1.0 / 255.0,
                                   zoom_range = 0.4,
                                   rotation_range = 10,
                                   horizontal_flip = True,
                                   vertical_flip = True,
                                   validation_split = 0.2)

valid_datagen = ImageDataGenerator(rescale = 1.0 / 255.0,
                                   validation_split = 0.2)

test_datagen  = ImageDataGenerator(rescale = 1.0 / 255.0)

In [13]:
# train_dataset  = train_datagen.flow_from_directory(directory = 'Waste_DATASET/TRAIN',
#                                                    target_size = (224,224),
#                                                    class_mode = 'binary',
#                                                    batch_size = 128, 
#                                                    subset = 'training')

train_dataset  = train_datagen.flow_from_directory(directory = 'Waste_DATASET/TRAIN',
                                                   target_size = (224,224),
                                                   class_mode = 'binary',
                                                   batch_size = 64, 
                                                   subset = 'training')

Found 18052 images belonging to 2 classes.


In [14]:
# valid_dataset = valid_datagen.flow_from_directory(directory = 'Waste_DATASET/TRAIN',
#                                                   target_size = (224,224),
#                                                   class_mode = 'binary',
#                                                   batch_size = 128, 
#                                                   subset = 'validation')

valid_dataset = valid_datagen.flow_from_directory(directory = 'Waste_DATASET/TRAIN',
                                                  target_size = (224,224),
                                                  class_mode = 'binary',
                                                  batch_size = 64, 
                                                  subset = 'validation')

Found 4512 images belonging to 2 classes.


In [15]:
# Class Indices 

train_dataset.class_indices

{'O': 0, 'R': 1}

In [19]:
# # Viewing Images

# # fig, ax = plt.subplots(nrows = 1, ncols = 2, figsize = (20,20))

# for i in tqdm(range(12)):
#     rand1 = np.random.randint(len(train_dataset))
#     rand2 = np.random.randint(128)
#     ax[i].imshow(train_dataset[rand1][0][rand2])
#     ax[i].axis('off')
#     label = train_dataset[rand1][1][rand2]
#     if label == 1:
#         ax[i].set_title('Recycle Waste')
#     else:
#         ax[i].set_title('Organic Waste')

In [0]:
### MODEL BUILDING

In [0]:
# Defining Model

base_model = VGG16(input_shape=(224,224,3), 
                   include_top=False,
                   weights="imagenet")

In [0]:
# Freezing Layers 

for layer in base_model.layers:
    layer.trainable=False

In [0]:
# Summary

base_model.summary()

In [0]:
# Defining Layers

model=Sequential()
model.add(base_model)
model.add(Dropout(0.2))
model.add(Flatten())
model.add(BatchNormalization())
model.add(Dense(1024,kernel_initializer='he_uniform'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(0.2))
model.add(Dense(1024,kernel_initializer='he_uniform'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(0.2))
model.add(Dense(1,activation='sigmoid'))

In [0]:
# Summary

model.summary()

In [0]:
# Model Compile 

OPT    = tensorflow.keras.optimizers.Adam(learning_rate=0.001)

model.compile(loss='binary_crossentropy',
              metrics=[tensorflow.keras.metrics.AUC(name = 'auc')],
              optimizer=OPT)

In [0]:
# Defining Callbacks

filepath = './best_weights.hdf5'

earlystopping = EarlyStopping(monitor = 'val_auc', 
                              mode = 'max' , 
                              patience = 5,
                              verbose = 1)

checkpoint    = ModelCheckpoint(filepath, 
                                monitor = 'val_auc', 
                                mode='max', 
                                save_best_only=True, 
                                verbose = 1)


callback_list = [earlystopping, checkpoint]

In [0]:
# Model Fitting 

model_history=model.fit(train_dataset,
                        validation_data=valid_dataset,
                        epochs = 10,
                        callbacks = callback_list,
                        verbose = 1)

In [0]:
### MODEL EVALUATION

In [0]:
# Summarize the model loss

plt.plot(model_history.history['loss'])
plt.plot(model_history.history['val_loss'])
plt.title('VGG16 Model Loss', FontSize = 16)
plt.ylabel('Loss', FontSize = 16)
plt.xlabel('Epoch', FontSize = 16)
plt.legend(['Train', 'Validation'], loc='upper left', bbox_to_anchor=(1,1))
plt.show()

In [0]:
# Summarize models auc

plt.plot(model_history.history['auc'])
plt.plot(model_history.history['val_auc'])
plt.title('VGG16 Model Accuracy', FontSize = 16)
plt.ylabel('Accuracy',FontSize = 16)
plt.xlabel('Epoch',FontSize = 16)
plt.legend(['Train', 'Validation'], loc='upper left', bbox_to_anchor=(1,1))
plt.show()

In [0]:
max(model_history.history['val_auc']), max(model_history.history['val_auc'])

In [0]:
# Test Data 

test_data = test_datagen.flow_from_directory(directory = 'Waste_DATASET/TEST',
                                             target_size = (224,224),
                                             class_mode = 'binary',
                                             batch_size = 128)
predictions = model.predict(test_data)
predictions = np.argmax(predictions, axis=1)

In [None]:
## Confusion Matrix
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
## Create the Confusion Matrix out of the Actual and Predicted Data.
cm = confusion_matrix(test_data.class_indices , predictions)
# scale the font size and color of the map
ax = sns.set(font_scale=1) #edited as suggested
ax = sns.heatmap(cm, annot=True, ax=ax, cmap="Blues", fmt="g");  # annot=True to annotate cells
## Print the Confusion Matrix
print(cm)
ax.set_title('Confusion Matrix', size=20);
ax.set_xlabel('Predicted Labels',size=20)
ax.set_ylabel('Actual Labels', size=20);

In [None]:
# F1 Score
print('Accuracy on testing set:', accuracy_score(test_data.class_indices, predictions)*100, "%")
print('Precision on testing set:', precision_score(test_data.class_indices, predictions, average = 'weighted')*100, "%")
print('Recall on testing set:', recall_score(test_dataset.class_indices, predictions, average = 'weighted')*100, "%")
# F1 Score = 2*((precision*recall)/(precision+recall))
precision_test = precision_score(test_data.class_indices, predictions, average = 'weighted')
recall_test = recall_score(test_data.class_indices, predictions, average = 'weighted')
print('F1 on testing set:' , 2*((precision_test*recall_test)/(precision_test+recall_test)) )
from sklearn.metrics import accuracy_score, f1_score
print('F1 score is',f1_score(test_data.class_indices, predictions, average = 'weighted') *100, "%")

In [None]:
# ROC - AUC Score
predicted_probab =model.predict(test_data)
# predicted_probab
n_class = 2

for i in range(n_class):    
    print("ROC- AUC score is", roc_auc_score(test_data.class_indices, predicted_probab[:,i], multi_class='ovr')*100)
    
# print("ROC- AUC score is", roc_auc_score( test.Labels, predicted_probab, multi_class='ovr')*100)
print("ROC- AUC score is", roc_auc_score( test_data.class_indices, predicted_probab, multi_class='ovr')*100)
from sklearn import metrics

# roc curve for classes
fpr = {}
tpr = {}
thresh ={}

n_class = 2

for i in range(n_class):    
    fpr[i], tpr[i], thresh[i] = metrics.roc_curve(test_data.class_indices, predicted_probab[:,i], pos_label=i)
    
# plotting    
plt.plot(fpr[0], tpr[0], linestyle='--',color='orange', label='Class 0 vs Rest')
plt.plot(fpr[1], tpr[1], linestyle='--',color='green', label='Class 1 vs Rest')

plt.title('Multiclass ROC curve')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive rate')
plt.legend(loc='best')
plt.savefig('Multiclass ROC',dpi=300);   

In [0]:
# Evaluating Loss and AUC - Test Data 

model.evaluate(test_data)

In [0]:
# Test Case:1 - ORGANIC

dic = test_data.class_indices
idc = {k:v for v,k in dic.items()}

img = load_img('Waste_DATASET/TEST/O/O_12650.jpg', target_size=(224,224))
img = img_to_array(img)
img = img / 255
imshow(img)
plt.axis('off')
img = np.expand_dims(img,axis=0)
answer = model.predict_proba(img)

if answer[0][0] > 0.5:
    print("The image belongs to Recycle waste category")
else:
    print("The image belongs to Organic waste category ")

In [0]:
# Test Case:2 - RECYCLE

dic = test_data.class_indices
idc = {k:v for v,k in dic.items()}

img = load_img('Waste_DATASET/TEST/R/R_10011.jpg', target_size=(224,224))
img = img_to_array(img)
img = img / 255
imshow(img)
plt.axis('off')
img = np.expand_dims(img,axis=0)
answer = model.predict_proba(img)

if answer[0][0] > 0.5:
    print("The image belongs to Recycle waste category")
else:
    print("The image belongs to Organic waste category ")

In [0]:
# CONCLUSION

# 1. We were able to classify images properly having accuracy of 97.00% in training dataset.

# 2. We acheived an accuracy of 95.60% on validation data and 94.98% accuracy on test accuracy.