# First

## Libraries

In [None]:
import warnings
warnings.filterwarnings("ignore")
import os
import pandas as pd
import numpy as np
from  PIL import Image
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import label_binarize
from sklearn.metrics import roc_curve
from tensorflow import data
from tensorflow.keras.layers import Conv2D , MaxPool2D , Dropout , Flatten , Dense , Rescaling
from tensorflow.keras.layers import RandomFlip, RandomRotation, RandomZoom, BatchNormalization
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import image_dataset_from_directory


## EDA and Class distribution

In [None]:
#Size of an image
path = "/kaggle/input/intel-image-classification/seg_test/seg_test/buildings/20057.jpg"
img = Image.open(path)
print("Size of an image : {}".format(img.size))

In [None]:
#To fetch our images from file
#for train directory
train = image_dataset_from_directory(
    "/kaggle/input/intel-image-classification/seg_train/seg_train",
    image_size = (150,150),
    batch_size = 128,
    shuffle = True,)
train_new = train.cache().prefetch(buffer_size = data.AUTOTUNE)

#for test directory
test = image_dataset_from_directory(
    "/kaggle/input/intel-image-classification/seg_test/seg_test",
    image_size = (150,150),
    batch_size = 128,
    shuffle = False,)

test = test.cache().prefetch(buffer_size = data.AUTOTUNE)

In [None]:
#Class distribution of train and test and visualization with piechart
all_labels = []
x_train = []

all_labels_test = []
for img_batch , label_batch in train:
    all_labels.extend(label_batch.numpy())
    x_train.extend(img_batch.numpy())
for img_batch , label_batch in test:
    all_labels_test.extend(label_batch.numpy())

In [None]:
#Visualization
class_names = train.class_names
counts = np.bincount(all_labels)
counts_test = np.bincount(all_labels_test)
plt.pie(counts , labels = class_names , autopct = "%1.2f%%")
plt.title("Class Distribution of Train data")
plt.show()
plt.pie(counts_test , labels = class_names , autopct = "%1.2f%%")
plt.title("Class Distribution of Test data")
plt.show()

In [None]:
#Class distribution with barplot
df = pd.DataFrame({
    "train" : counts,
    "test" : counts_test,
},index = class_names
                 )

In [None]:
df.plot.bar()
plt.show()

# Training

In [None]:
#Our model
model = Sequential()
model.add(Rescaling(scale = 1./255))
model.add(Conv2D(32, (3,3) , activation = "elu" , input_shape = (150,150,3) ))
model.add(MaxPool2D(pool_size = (2,2)))
model.add(Conv2D(64 , (3,3) , activation = "elu"))
model.add(MaxPool2D(pool_size = (2,2)))
model.add(Flatten())
model.add(Dense(units = 128 , activation = "elu"))
model.add(Dropout(0.4))
model.add(Dense(units = 64 , activation = "elu"))
model.add(Dropout(0.4))
model.add(Dense(units = 6 , activation = "softmax"))
optimizer = Adam()
model.compile(optimizer = optimizer , metrics = ["accuracy"] , loss = "sparse_categorical_crossentropy")

In [None]:
#training with new training data
model.fit(train_new , validation_data = test , epochs = 20)

After %20 epochs we get %98 acc rate and %76 val_accuracy acc rate is good but our model is still overfitted to train dataset</br>
<font color = "red">
Maybe we can solve it with data augmentation
</font>

In [None]:
#Data Augmentation Layers
data_augmentation = Sequential()
data_augmentation.add(RandomFlip()) 
data_augmentation.add(RandomZoom(0.01))
data_augmentation.add(RandomRotation(0.01))

In [None]:
#Apply augmentation to our train dataset
train_with_more_images = train_new.map(lambda x, y: (data_augmentation(x, training=True), y),
                        num_parallel_calls=data.AUTOTUNE).cache().prefetch(buffer_size=data.AUTOTUNE)

In [None]:
#Our model
model_with_augmentation = Sequential()
model_with_augmentation.add(Rescaling(scale = 1./255))
model_with_augmentation.add(Conv2D(64, (3,3) , activation = "elu" , input_shape = (150,150,3) ))
model_with_augmentation.add(MaxPool2D(pool_size = (2,2)))
model_with_augmentation.add(Conv2D(64 , (3,3) , activation = "relu"))
model_with_augmentation.add(MaxPool2D(pool_size = (2,2)))
model_with_augmentation.add(Conv2D(64 , (3,3) , activation = "relu"))
model_with_augmentation.add(MaxPool2D(pool_size = (2,2)))
model_with_augmentation.add(Flatten())
model_with_augmentation.add(Dense(units = 128 , activation = "relu"))
model_with_augmentation.add(Dropout(0.3))
model_with_augmentation.add(Dense(units = 64 , activation = "relu"))
model_with_augmentation.add(Dropout(0.3))
model_with_augmentation.add(Dense(units = 64 , activation = "relu"))
model_with_augmentation.add(Dense(units = 6 , activation = "softmax"))
optimizer = Adam()
model_with_augmentation.compile(optimizer = optimizer , metrics = ["accuracy"] , loss = "sparse_categorical_crossentropy")

In [None]:
#training with new training data
model_with_augmentation.fit(train_with_more_images , validation_data = test , epochs = 20)

With data augmentation we barely get %92 accuracy rate and %68 val_accuracy rate it's not good and also our model is overfitted to train dataset it means data augmentation is not always a good method

# Evaulating with ROC curve

In [None]:
predict = model.predict(test)

In [None]:
#To get test labels
test_labels = []
for img , label in test:
    test_labels.extend(label.numpy())


In [None]:
#It converts softmax output to one hot encoded output
predict_one_hot = []
for i in predict:
    temp = np.zeros_like(i)
    temp[np.argmax(i)] = 1
    predict_one_hot.append(temp)
predict_one_hot = np.array(predict_one_hot)
    

In [None]:
test_labels_one_hot = label_binarize(test_labels , classes = [0,1,2,3,4,5])

In [None]:
test_labels_one_hot.shape

In [None]:
#To find True positive rate and False positive rate
tpr = dict()
fpr = dict()
tresholds = []
for i in range(predict_one_hot.shape[1]):
    fpr[i] , tpr[i] , treshold = roc_curve(test_labels_one_hot[:,i], predict_one_hot[:,i])

In [None]:
fig , axes = plt.subplots(3,2)
for idx in range(predict_one_hot.shape[1]):
    i = idx // 2
    j = idx % 2   
    ax = axes[i, j]
    sns.scatterplot(x=fpr[idx], y=tpr[idx], ax=ax)
    ax.plot(fpr[idx], tpr[idx], color='orange')
    ax.set_title(f"Label {idx} ROC Curve")
    ax.set_xlabel("False Positive Rate")
    ax.set_ylabel("True Positive Rate")

plt.tight_layout()
plt.show() 
    


---

## 🎉 Conclusion

In this notebook, we built a convolutional neural network model and applied data augmentation techniques to improve generalization. We also visualized ROC curves for each class to evaluate the performance in detail.

💡 I hope this project gave you useful insights and ideas for your own machine learning tasks!

---

### 🙌 Show Some Love

If you found this notebook helpful:

✅ Please consider **upvoting**,  
💬 Leave a **comment** if you have questions or feedback,  
🔔 And feel free to **follow me** for more ML projects!

---

Thanks for reading! 😊  
