In [70]:
! pip install imutils



# Downloading the Dataset

In [71]:
! kaggle datasets download -d robinreni/house-rooms-image-dataset --unzip

Downloading house-rooms-image-dataset.zip to C:\Users\pc\PycharmProjects\TensorProject\DL\Untitled Folder




  0%|          | 0.00/116M [00:00<?, ?B/s]
  1%|          | 1.00M/116M [00:00<00:31, 3.85MB/s]
  3%|3         | 4.00M/116M [00:00<00:09, 12.6MB/s]
  5%|5         | 6.00M/116M [00:00<00:13, 8.50MB/s]
  9%|8         | 10.0M/116M [00:00<00:08, 12.7MB/s]
 10%|#         | 12.0M/116M [00:01<00:08, 12.7MB/s]
 12%|#2        | 14.0M/116M [00:01<00:07, 13.9MB/s]
 14%|#3        | 16.0M/116M [00:01<00:07, 14.7MB/s]
 16%|#6        | 19.0M/116M [00:01<00:06, 16.9MB/s]
 18%|#8        | 21.0M/116M [00:01<00:06, 15.5MB/s]
 20%|#9        | 23.0M/116M [00:01<00:06, 15.6MB/s]
 21%|##1       | 25.0M/116M [00:01<00:06, 15.4MB/s]
 23%|##3       | 27.0M/116M [00:02<00:05, 16.2MB/s]
 25%|##4       | 29.0M/116M [00:02<00:06, 14.7MB/s]
 27%|##6       | 31.0M/116M [00:02<00:05, 15.6MB/s]
 28%|##8       | 33.0M/116M [00:02<00:05, 15.3MB/s]
 30%|###       | 35.0M/116M [00:02<00:05, 15.1MB/s]
 32%|###1      | 37.0M/116M [00:02<00:05, 14.8MB/s]
 34%|###3      | 39.0M/116M [00:02<00:05, 14.7MB/s]
 35%|###5      | 41.

# Importing Libraries

In [72]:
import pandas as pd
import numpy as np

import seaborn as sns
import matplotlib.pyplot as plt
import shutil
import os

# TensorFlow
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.layers import BatchNormalization, Dropout, Dense, Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Model

# SciKit Learn
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split

from imutils import paths
from tqdm import tqdm

## Assiging Variables and Functions

In [73]:
ip_df = "./House_Room_Dataset/Bedroom"

Tr = "training"
Val = "evaluation"
Ts = "testing"

base_path = "dataset"
batch_size = 32
Classes = ["Modern", "Old"]

## Base-Line Configuration for graph Plotation

In [74]:
def plot_hist(hist, metric):
    if metric == 'auc':
        plt.plot(hist.history["auc"])
        plt.plot(hist.history["val_auc"])
    else:
        plt.plot(hist.history["loss"])
        plt.plot(hist.history["val_loss"])

    plt.style.use('ggplot')
    plt.title('model {}'.format(metric))
    plt.xlabel('epoch')
    plt.ylabel("{}".format(metric))
    plt.legend(["train", "validation"], loc='upper left')
    plt.show()

## Loading Labels

In [75]:
# Reading labels from the txt file
with open("C:/Users/pc/Desktop/AI and ML/convolution/datasets/House rooms interiors/Labels/labels.txt", 'r') as f:
      manual_labels = f.read()
        
# Extracting individual labels into a list
labels = [i for i in manual_labels]
len(labels)

451

In [76]:
# For checking the equillibrium of the dataset
from collections import Counter
print(Counter(labels).keys()) 
print(Counter(labels).values())

dict_keys(['O', 'M'])
dict_values([271, 180])


## Train Test Validation Split

In [77]:
# sorting files in the order they appear
files = os.listdir(ip_df)
files.sort(key=lambda f: int(f.split('_')[1].split('.')[0]))

# checking to see the correct file order
files[:5]

['bed_1.jpg', 'bed_2.jpg', 'bed_3.jpg', 'bed_4.jpg', 'bed_8.jpg']

In [78]:
# Splitting the Dataset[Test Set]
X_train, X_test, y_train, y_test = train_test_split(files[:len(labels)],labels, stratify=labels, train_size = 0.9)

# Splitting the dataset[Validation Set]
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, stratify=y_train, train_size=0.85)

# Checking the size of train, test, eval
len(X_train), len(y_train), len(X_val), len(y_val),  len(X_test), len(y_test)

(344, 344, 61, 61, 46, 46)

In [87]:
# Building the dataset properly - 
splits = [(X_train, y_train), (X_test, y_test), (X_val, y_val)]
dirnames = ['training', 'testing', 'validation']

for i, (data,label) in enumerate(splits):
    outside_dir=dirnames[i]

    for j in tqdm(range(0, len(label)), desc="Iterating over images in sub folder"):
        dir = label[j]
        
        # construct the path to the sub-directory
        dirPath = os.path.join(base_path, outside_dir, dir)
        
        # if the output directory does not exist, create it
        if not os.path.exists(dirPath):
            os.makedirs(dirPath)
            
            
        # copy the img to this new directory
        src_img = os.path.join(ip_df, data[j])
        shutil.copy(src_img, dirPath)

Iterating over images in sub folder: 100%|█████████████████████████████████████████| 344/344 [00:00<00:00, 1619.33it/s]
Iterating over images in sub folder: 100%|███████████████████████████████████████████| 46/46 [00:00<00:00, 1072.63it/s]
Iterating over images in sub folder: 100%|███████████████████████████████████████████| 61/61 [00:00<00:00, 1422.35it/s]


In [88]:
trainPath = os.path.sep.join([base_path, Tr])
valPath = os.path.sep.join([base_path, Val])
testPath = os.path.sep.join([base_path, Ts])

totalTrain = len(list(paths.list_images(trainPath)))
totalVal = len(list(paths.list_images(valPath)))
totalTest = len(list(paths.list_images(testPath)))

print(totalTrain, totalTest, totalVal)

344 46 46


# Data Augmentation

In [89]:
# initialize the training data augmentation object
trainAug = ImageDataGenerator(
    rotation_range=90,
    zoom_range=[0.5, 1.0],
    width_shift_range=0.3,
    height_shift_range=0.25,
    shear_range=0.15,
    horizontal_flip=True,
    fill_mode="nearest",
    brightness_range=[0.2, 1.0]
   )

In [90]:
# Default for all the above parameters is 0, 
# meaning we are applying no augmentation to val set
# which is exactly what we need because val set should be treated like test set.
valAug = ImageDataGenerator()

In [91]:
testAug = ImageDataGenerator()

# Data Generators

In [113]:
# Create batches whilst creating augmented images on the fly

trainGen = trainAug.flow_from_directory(
    directory=trainPath,
    target_size=(224,224),
    save_to_dir='./dataset/augmented/train',
    save_prefix='train',
    shuffle=True # data will be shuffled between epochs
)

Found 344 images belonging to 2 classes.


In [114]:
valGen = valAug.flow_from_directory(
    directory=valPath,
    target_size=(224,224),
    shuffle=True
)

Found 46 images belonging to 2 classes.


In [115]:
testGen = testAug.flow_from_directory(
    directory=testPath,
    target_size=(224,224),
    shuffle=False
)

Found 46 images belonging to 2 classes.


# Transfer Learning

> ## Feature Extraction Step

In [116]:
baseModel = EfficientNetB0(
            weights="imagenet",
            include_top=False, # make sure top layer is not included
            input_tensor=Input(shape=(224, 224, 3)),
            pooling="avg"
           )

In [117]:
# freeze the weights
for layer in baseModel.layers:
      layer.trainable = False

In [118]:
# training a new classifier on top (Functional Keras Model)x = baseModel.output
x = baseModel.output

Layer_1 = BatchNormalization()(x)
Layer_2 = Dropout(0.5)(Layer_1)
output_layer = Dense(len(Classes), activation="softmax")(Layer_2)

model = Model(inputs = baseModel.input, outputs = output_layer)

In [119]:
# Another way to create the classifier on top of basemodelmodel = tf.keras.Sequential()

model = tf.keras.Sequential()
model.add(baseModel)
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(len(Classes), activation='softmax'))

In [120]:
# Compile

optimizer = Adam(learning_rate=1e-3)
model.compile(
    optimizer=optimizer,
    loss='binary_crossentropy',
    metrics=[tf.keras.metrics.AUC()]
)

In [121]:
# implementing early stopping
es = EarlyStopping(
     monitor='val_loss',  #metric to monitor
     mode='min',  # whether to min or max the metric monitored
     patience=10, # epochs to wait before declaring stopped training
     verbose=1  # output epoch when training was stopped
     )

# implementing model checkpoint
mc = ModelCheckpoint(
      'feature_extraction.h5',
       monitor='val_loss',
       mode='min',
       verbose=1, # display epoch+accuracy everytime model is saved
       save_best_only=True
      )

In [122]:
# Training the model
hist = model.fit(
    x=trainGen,
    epochs=25,
    verbose=2,
    validation_data=valGen,
    steps_per_epoch=totalTrain // batch_size,
    callbacks=[es, mc]
      )

FileNotFoundError: [Errno 2] No such file or directory: './dataset/augmented/train\\train_207_334489.png'