In [2]:
import tensorflow as tf
from tensorflow.keras import datasets,layers,models
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
import pathlib
import pandas as pd
import numpy as np
import pathlib

In [3]:
from google.colab import drive
drive.mount('/content/gdrive/')

Mounted at /content/gdrive/


In [4]:
import pathlib
data_dir = pathlib.Path('/content/gdrive/MyDrive/data/data/images')
file_paths = list(data_dir.rglob('*.*'))
paths = [(path.parts[-2], str(path)) for path in file_paths]
df = pd.DataFrame(data=paths, columns=['Class', 'Images'])
df = df.sort_values('Class', ascending=True)
df.reset_index(drop=True, inplace=True)
print(df.head())

         Class                                             Images
0  architecure  /content/gdrive/MyDrive/data/data/images/archi...
1  architecure  /content/gdrive/MyDrive/data/data/images/archi...
2  architecure  /content/gdrive/MyDrive/data/data/images/archi...
3  architecure  /content/gdrive/MyDrive/data/data/images/archi...
4  architecure  /content/gdrive/MyDrive/data/data/images/archi...


In [5]:
print('Count the number of images in each class')
print(df['Class'].value_counts())

Count the number of images in each class
travel and  adventure    8810
food and d rinks         8780
architecure              8763
art and culture          8752
Name: Class, dtype: int64


In [6]:
batch_size = 16
img_height = 128
img_width = 128

In [7]:
train_ds = tf.keras.utils.image_dataset_from_directory(
  data_dir,
  validation_split=0.2,
  subset="training",
  seed=123,
  image_size=(img_height, img_width),
  batch_size=batch_size)

Found 35105 files belonging to 4 classes.
Using 28084 files for training.


In [8]:
val_ds = tf.keras.utils.image_dataset_from_directory(  #the tf terminology for this method demands the term validation where we we might otherwise use the term test. Just know that validation in this case means test, we're not doing a 3-way split of the data nor k-fold cross-validation
  data_dir,
  validation_split=0.2,
  subset="validation",
  seed=123,
  image_size=(img_height, img_width),
  batch_size=batch_size)

Found 35105 files belonging to 4 classes.
Using 7021 files for validation.


In [10]:
class_names_train = train_ds.class_names
print(class_names_train)

['architecure', 'art and culture', 'food and d rinks', 'travel and  adventure']


In [11]:
class_names_test = val_ds.class_names
print(class_names_test)

['architecure', 'art and culture', 'food and d rinks', 'travel and  adventure']


In [15]:
model = Sequential([
  layers.Rescaling(1./255, input_shape=(img_height, img_width, 3)),
  layers.Conv2D(16, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Conv2D(32, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Conv2D(64, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Flatten(),
  layers.Dense(128, activation='relu'),
  layers.Dense(4)])

In [21]:
from tensorflow.keras.losses import SparseCategoricalCrossentropy
model.compile(optimizer='adam',loss=SparseCategoricalCrossentropy(),metrics=['accuracy'])
model.fit(train_ds, validation_data=val_ds, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7963e57d9bd0>

In [22]:
from sklearn.metrics import classification_report
import numpy as np
true_labels = []
predicted_labels = []
for images, labels in val_ds:
    batch_predictions = model.predict(images)
    batch_predicted_labels = np.argmax(batch_predictions, axis=1)
    true_labels.extend(labels.numpy())
    predicted_labels.extend(batch_predicted_labels)
print("Classification Report:\n", classification_report(true_labels, predicted_labels))

Classification Report:
               precision    recall  f1-score   support

           0       0.25      1.00      0.40      1778
           1       0.00      0.00      0.00      1793
           2       0.00      0.00      0.00      1703
           3       0.00      0.00      0.00      1747

    accuracy                           0.25      7021
   macro avg       0.06      0.25      0.10      7021
weighted avg       0.06      0.25      0.10      7021



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [25]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 128, 128, 3)       0         
                                                                 
 conv2d (Conv2D)             (None, 128, 128, 16)      448       
                                                                 
 max_pooling2d (MaxPooling2  (None, 64, 64, 16)        0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 64, 64, 32)        4640      
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 32, 32, 32)        0         
 g2D)                                                            
                                                                 
 conv2d_2 (Conv2D)           (None, 32, 32, 64)        1