In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

Firstly, we will load the Dataset which have Image name and corresponding Labels

In [2]:
Dataset = pd.read_csv("/kaggle/input/scene-classification/train-scene classification/train.csv")    # Dataframe

In [3]:
# Converting Class number to strings
Dataset["label"] = Dataset["label"].astype(str)

In [4]:
Dataset.head()

In [5]:
import matplotlib.pyplot as plt
import seaborn as sns

In [6]:
# Displaying number of samples for each Disease
fig, ax = plt.subplots(figsize = (10, 4))                                # Setting Figure Size
sns.countplot(x ='label', data=Dataset)                                  # Creating Seaborn Count Plot
plt.xlabel("Class Label")                                                # X-Label of the plot
plt.ylabel("Number of Samples")                                          # Y-Label of the plot
plt.show() 

The Data is almost balanced.

## Splitting Data into Train/Test

In [7]:
from sklearn.model_selection import train_test_split

In [8]:
Data_train, Data_test = train_test_split(Dataset, test_size=0.2)                   # Splitting in 80:20

## Creating Keras Image Data Flow

In [17]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [19]:
datagen = ImageDataGenerator(rescale=1./255)                            # Normalizing pixels of images

## Creating Training Data

In [20]:
# Training Set Directory
dir1='/kaggle/input/scene-classification/train-scene classification/train/'

In [21]:
train_gen=datagen.flow_from_dataframe(dataframe = Data_train,           # Training Dataframe
                                      directory=dir1,                   # Training set Directory
                                      batch_size=20,                    # Size of Batch
                                      class_mode="categorical",         # Type of Labels
                                      x_col="image_name",               # Input Column
                                      color_mode="rgb",                 # Image Format
                                      y_col="label",                    # Target Column
                                      target_size=(224,224))            # Image Size

## Creating Validation Data

In [22]:
valid_gen=datagen.flow_from_dataframe(dataframe = Data_test,            # Training Dataframe
                                      directory=dir1,                   # Training set Directory
                                      batch_size=20,                    # Size of Batch
                                      class_mode="categorical",         # Type of Labels
                                      x_col="image_name",               # Input Column
                                      color_mode="rgb",                 # Image Format
                                      y_col="label",                    # Target Column
                                      target_size=(224,224))            # Image Size

## Importing Resnet Model to apply Transfer Learning

In [24]:
import tensorflow.keras

In [25]:
import tensorflow as tf

In [27]:
from tensorflow.keras import layers
from tensorflow.keras.layers.experimental import preprocessing
from tensorflow.keras.models import Sequential

In [30]:
# train_gen.classes

In [32]:
base_model = tf.keras.applications.EfficientNetB1(include_top=False)
base_model.trainable=False

# Setup model architecture with trainable top layers
inputs = layers.Input(shape=(224,224,3), name="input_layer")
x = base_model(inputs, training=False) #Put the base model in inference mode, and so weights ehich remains frozen, remains frozen
x = layers.Conv2D(64, (3, 3), activation = 'relu')(x)
x = layers.Flatten()(x)
x = layers.Dense(100, activation="relu")(x)
outputs = layers.Dense(6, activation="softmax", name="output_layer")(x)
model = tf.keras.Model(inputs, outputs)

In [33]:
ResNet_model = tf.keras.applications.resnet_v2.ResNet152V2(
    include_top=False, weights='imagenet', 
    input_shape=(224,224,3)
)

In [34]:
ResNet_model


## Building Model

In [35]:
from keras import Model 
from keras.layers import Conv2D, Dense, MaxPooling2D, Dropout, Flatten,GlobalAveragePooling2D
from keras.models import Sequential

In [36]:
for layer in ResNet_model.layers[:-15]:       # Freezing all layers other than last 15 Layers
    layer.trainable = False

x = ResNet_model.output
x = GlobalAveragePooling2D()(x)
x = Flatten()(x)
x = Dense(units=512, activation='relu')(x)
x = Dropout(0.3)(x)
x = Dense(units=512, activation='relu')(x)
x = Dropout(0.3)(x)
output  = Dense(units=6, activation='softmax')(x)
model = Model(ResNet_model.input, output)

In [37]:
# model Summary
print(model.summary())

### Setting Loss function, Optimizer and Compling the model

In [38]:
loss = keras.losses.CategoricalCrossentropy()
optimizer = keras.optimizers.Adam(learning_rate=0.001)
model.compile(optimizer=optimizer, loss=loss, metrics= ['accuracy'])

### Compiling the Model

In [39]:
STEP_SIZE_TRAIN=train_gen.n//train_gen.batch_size
STEP_SIZE_VALID=valid_gen.n//valid_gen.batch_size

print(STEP_SIZE_TRAIN)
print(STEP_SIZE_VALID)

In [40]:
transfer_learning_history = model.fit_generator(generator=train_gen,
                            steps_per_epoch=STEP_SIZE_TRAIN,
                            validation_data=valid_gen,
                            validation_steps=STEP_SIZE_VALID,
                            epochs=3)

## Visualizing accuracy and loss

In [41]:
import matplotlib.pyplot as plt

acc = transfer_learning_history.history['accuracy']
val_acc = transfer_learning_history.history['val_accuracy']

loss = transfer_learning_history.history['loss']
val_loss = transfer_learning_history.history['val_loss']

epochs_range = range(3)

plt.figure(figsize=(20, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

In [42]:
import itertools
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import confusion_matrix

In [43]:
y_labels = []
for x in range(len(valid_gen)):
    i, l = valid_gen.next()
    y_labels.extend(np.array(l))

In [44]:
len(y_labels)

In [45]:
predictions = model.predict(valid_gen)
len(predictions)

In [46]:
y_labels[:10]

In [47]:
predictions[:10]

In [48]:
pred_classes = predictions.argmax(axis=1)
pred_classes[:10]

In [50]:
y_labels=np.array(y_labels)

In [51]:
y_true=y_labels.argmax(axis=1)
y_true[:10]

In [52]:
from sklearn.metrics import accuracy_score
sklearn_accuracy = accuracy_score(y_true, pred_classes)
sklearn_accuracy

In [53]:
class_names=["Buildings" ,"Forests", "Mountains", "Glacier", "Street", "Sea"]

In [57]:
cm = confusion_matrix(y_true, pred_classes)
cm_norm = cm.astype("float") / cm.sum(axis=1)[:, np.newaxis] # normalize it
n_classes = cm.shape[0] # find the number of classes we're dealing with

# Plot the figure and make it pretty
fig, ax = plt.subplots(figsize=(20,20))
cax = ax.matshow(cm, cmap=plt.cm.BuPu) # colors will represent how 'correct' a class is, darker == better
fig.colorbar(cax)

labels = class_names

plt.xticks(np.arange(len(class_names)), class_names, rotation=15, fontsize=15)
plt.yticks(np.arange(len(class_names)), class_names, fontsize=15)
plt.xlabel("Predicted", fontsize =30)
ax.xaxis.tick_bottom()
plt.ylabel("Actual", fontsize =30)
plt.title("Confusion Matrix", fontsize=50)

# Set the threshold for different colors
threshold = (cm.max() + cm.min()) / 2.

# Plot the text on each cell
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
    plt.text(j, i, f"{cm_norm[i, j]*100:.3f}%",
            horizontalalignment="center",
            color="white" if cm_norm[i, j] > 0.5 else "black",
            size=20)

fig.savefig("confusion_matrix.png")

In [58]:
from sklearn.metrics import classification_report

In [59]:
print(classification_report(y_true=y_true,
                            y_pred=pred_classes))

In [61]:
classification_report_dict = classification_report(y_true, pred_classes, output_dict=True)
classification_report_dict

In [62]:
class_f1_scores = {}

# Loop through the classification report dictionary items
for k, v in classification_report_dict.items():
    if k == "accuracy" : # Stop once we get to accuracy key
        break
    else:
        # Add class names and f1-scores
        class_f1_scores[class_names[int(k)]] = v["f1-score"]
class_f1_scores

In [63]:
import pandas as pd
f1_scores = pd.DataFrame({"class_names": list(class_f1_scores.keys()),
                           "f1-scores": list(class_f1_scores.values())}).sort_values("f1-scores", ascending=False)
f1_scores

In [65]:
import matplotlib.pyplot as plt

fig, ax = plt.subplots(figsize=(7,7))
scores = ax.barh(range(len(f1_scores)), f1_scores["f1-scores"].values)# get f1 score values

ax.set_yticks(range(len(f1_scores)))
ax.set_yticklabels(f1_scores["class_names"])
ax.set_xlabel("F1 Scores")
ax.set_title("F1 scores as per all the classes in Scene Classification")
ax.invert_yaxis();

In [68]:
tf.keras.utils.plot_model(
    model, to_file='model.png', show_shapes=True, 
    show_layer_names=True, rankdir='TB', expand_nested=False, dpi=96
)

# Since, we are using Pre-trained Moedl. The Model Convergers very fast.
# This is the reason we are getting best results in only 2 epochs