In [1]:
import os
import tensorflow as tf
import cv2
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras import layers,Sequential
import xgboost
import glob
import numpy as np 
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score,f1_score,confusion_matrix,classification_report


In [2]:
train_path = "..\\Dataset\\datasets\\train\\*\\*"
validation_path = "..\\Dataset\\datasets\\validation\\*\\*"
test_path = "..\\Dataset\\datasets\\test\\*\\*"

In [3]:
def weigh_calculator():
    cardboard_number = len(os.listdir('..\\Dataset\\datasets\\train'+ os.sep + f'cardboard'))
    glass_number = len(os.listdir('..\\Dataset\\datasets\\train'+ os.sep + f'glass'))
    metal_number = len(os.listdir('..\\Dataset\\datasets\\train'+ os.sep + f'metal'))
    paper_number = len(os.listdir('..\\Dataset\\datasets\\train'+ os.sep + f'paper'))
    plastic_number = len(os.listdir('..\\Dataset\\datasets\\train'+ os.sep + f'plastic'))
    trash_number = len(os.listdir('..\\Dataset\\datasets\\train'+ os.sep + f'trash'))
    cardboard_wight =1/((cardboard_number) * (1/glass_number + 1/metal_number + 1/paper_number + 1/plastic_number + 1/trash_number))
    glass_wight =1/((glass_number) * (1/cardboard_number + 1/metal_number + 1/paper_number + 1/plastic_number + 1/trash_number))
    metal_wight =1/((metal_number) * (1/glass_number + 1/cardboard_number + 1/paper_number + 1/plastic_number + 1/trash_number))
    paper_wight =1/((paper_number) * (1/glass_number + 1/metal_number + 1/cardboard_number + 1/plastic_number + 1/trash_number))
    plastic_wight =1/((plastic_number) * (1/glass_number + 1/metal_number + 1/paper_number + 1/cardboard_number + 1/trash_number))
    trash_wight =1/((trash_number) * (1/glass_number + 1/metal_number + 1/paper_number + 1/plastic_number + 1/cardboard_number))
    return {0:cardboard_wight, 1:glass_wight, 2:metal_wight, 3:paper_wight, 4:plastic_wight, 5:trash_wight}

In [4]:
def create_dataset(path):
    my_list = list(glob.glob(path))
    data= []
    label = []
    for item in my_list:
        label.append(item.split(os.sep)[-2])
        image = cv2.imread(item,cv2.IMREAD_COLOR)
        image = cv2.resize(image,(224,224))
        image = cv2.cvtColor(image,cv2.COLOR_BGR2RGB)
        data.append(image)
    return np.array(data),np.array(label)
        

In [5]:
data_augmentation = Sequential([
    layers.RandomContrast(0.2),
    layers.RandomFlip('horizontal'),
    layers.RandomRotation(factor=(-0.3, 0.3),fill_mode='wrap',interpolation='bilinear'),
    layers.RandomZoom(height_factor=(0.2,0.4),width_factor=(0.2,0.4),fill_mode='wrap',interpolation='bilinear'),
])

In [6]:
x_train,y_train = create_dataset(train_path)
x_val, y_val = create_dataset(validation_path)
x_test, y_test = create_dataset(test_path)

In [7]:
x_train = x_train/255
x_val = x_val /255
x_test = x_test/255

In [8]:
label_encoder = LabelEncoder()

In [9]:
y_train = label_encoder.fit_transform(y_train)
y_val = label_encoder.fit_transform(y_val)
y_test = label_encoder.fit_transform(y_test)

In [10]:
vgg16 = VGG16(weights='imagenet',include_top= False, input_shape=(224,224,3))

In [11]:
vgg16.summary()

Model: "vgg16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0     

In [12]:
for layer in vgg16.layers:
    layer.trainable = False
vgg16.summary()

Model: "vgg16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0     

In [13]:
feature_extractor = Sequential([data_augmentation,vgg16])

In [14]:
train_features = feature_extractor.predict(x_train)
train_features.shape

(1768, 7, 7, 512)

In [15]:
del x_train

In [16]:
train_features = train_features.reshape(train_features.shape[0],-1)
train_features.shape

(1768, 25088)

In [17]:
train_weight = weigh_calculator()
weights = np.zeros(len(y_train))
for i in range(0,6):
    weights[y_train==i] = train_weight[i]


In [18]:
train_dataset = xgboost.DMatrix(data=train_features,label= y_train,weight=weights)

In [19]:
del train_features

In [20]:
validation_features = feature_extractor.predict(x_val)
validation_features = validation_features.reshape(validation_features.shape[0],-1)

In [21]:
val_dataset = xgboost.DMatrix(data= validation_features,label= y_val)

In [22]:
del validation_features
del x_val

In [31]:
params_1 = {
    'booster': 'gbtree',
    'max_depth': 15, 
    'learning_rate': 0.25,
    'subsample': 0.8,
    'rate_drop': 0.2,
    'n_estimators': 1000,
    'min_chiled_weight': 3,
    'objective': 'multi:softmax',
    'num_class': 6,
    'gpu_id' : 0,
}

In [32]:
model = xgboost.train(params=params_1,dtrain=train_dataset,evals=[(val_dataset, 'eval')],early_stopping_rounds=100,num_boost_round=1000)

Parameters: { "min_chiled_weight", "n_estimators", "rate_drop" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


[0]	eval-mlogloss:1.63914
[1]	eval-mlogloss:1.51536
[2]	eval-mlogloss:1.42114
[3]	eval-mlogloss:1.34210
[4]	eval-mlogloss:1.28727
[5]	eval-mlogloss:1.22472
[6]	eval-mlogloss:1.17537
[7]	eval-mlogloss:1.13316
[8]	eval-mlogloss:1.09554
[9]	eval-mlogloss:1.06137
[10]	eval-mlogloss:1.03426
[11]	eval-mlogloss:1.00681
[12]	eval-mlogloss:0.98781
[13]	eval-mlogloss:0.96752
[14]	eval-mlogloss:0.94506
[15]	eval-mlogloss:0.92842
[16]	eval-mlogloss:0.91034
[17]	eval-mlogloss:0.89583
[18]	eval-mlogloss:0.88379
[19]	eval-mlogloss:0.87175
[20]	eval-mlogloss:0.85425
[21]	eval-mlogloss:0.83747
[22]	eval-mlogloss:0.82248
[23]	eval-mlogloss:0.81498
[24]	eval

[270]	eval-mlogloss:0.65189
[271]	eval-mlogloss:0.65116
[272]	eval-mlogloss:0.65183
[273]	eval-mlogloss:0.65191
[274]	eval-mlogloss:0.65245
[275]	eval-mlogloss:0.65278
[276]	eval-mlogloss:0.65242
[277]	eval-mlogloss:0.65261
[278]	eval-mlogloss:0.65237
[279]	eval-mlogloss:0.65281
[280]	eval-mlogloss:0.65289
[281]	eval-mlogloss:0.65268
[282]	eval-mlogloss:0.65369
[283]	eval-mlogloss:0.65363
[284]	eval-mlogloss:0.65383
[285]	eval-mlogloss:0.65353
[286]	eval-mlogloss:0.65346
[287]	eval-mlogloss:0.65365
[288]	eval-mlogloss:0.65347
[289]	eval-mlogloss:0.65334
[290]	eval-mlogloss:0.65337
[291]	eval-mlogloss:0.65355
[292]	eval-mlogloss:0.65325
[293]	eval-mlogloss:0.65331
[294]	eval-mlogloss:0.65333
[295]	eval-mlogloss:0.65342
[296]	eval-mlogloss:0.65278
[297]	eval-mlogloss:0.65282
[298]	eval-mlogloss:0.65250
[299]	eval-mlogloss:0.65247
[300]	eval-mlogloss:0.65234
[301]	eval-mlogloss:0.65214
[302]	eval-mlogloss:0.65226
[303]	eval-mlogloss:0.65209
[304]	eval-mlogloss:0.65184
[305]	eval-mlogloss:

In [None]:
test_features = feature_extractor.predict(x_test)
test_features = test_features.reshape(test_features.shape[0],-1)

In [None]:
test_dataset = xgboost.DMatrix(data=test_features)

In [None]:
predictions = model.predict(test_dataset)
predictions

In [None]:
acc = accuracy_score(predictions,y_test)
acc

In [None]:
report = classification_report(y_test,predictions,output_dict=True)

In [None]:
import pandas as pd
df = pd.DataFrame(report).transpose
df