In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import os
import numpy as np
import pandas as pd
from PIL import Image
import tensorflow as tf
import matplotlib.pyplot as plt

# Verify dataset path
dataset_path = "/kaggle/input/tops-and-bottoms/Modde Images"

# Define the paths for tops and bottoms
tops_dir = dataset_path + '/tops'
bottoms_dir = dataset_path + '/bottoms'
jpeg_dataset_path = "/kaggle/working/MODDE_JPEG"

if os.path.exists(dataset_path):
    print("Directory exists!")
    print("Contents:", os.listdir(dataset_path))
else:
    print("Directory does not exist.")

# Function to convert images to JPEG format
def convert_images_to_jpeg(src_dir, dest_dir):
    if not os.path.exists(dest_dir):
        os.makedirs(dest_dir)

    for root, _, files in os.walk(src_dir):
        for file in files:
            src_file_path = os.path.join(root, file)
            dest_file_path = os.path.join(dest_dir, os.path.relpath(root, src_dir), file)
            dest_file_dir = os.path.dirname(dest_file_path)

            if not os.path.exists(dest_file_dir):
                os.makedirs(dest_file_dir)

            file_extension = os.path.splitext(file)[1].lower()
            if file_extension not in ['.jpg', '.jpeg', '.png']:
                with Image.open(src_file_path) as img:
                    if img.mode != 'RGB':
                        img = img.convert('RGB')
                    dest_file_path = os.path.splitext(dest_file_path)[0] + '.jpg'
                    img.save(dest_file_path, 'JPEG')
            else:
                # Copy the file without conversion
                dest_file_path = os.path.splitext(dest_file_path)[0] + file_extension
                img = Image.open(src_file_path)
                img.save(dest_file_path)

# Convert images to JPEG format
convert_images_to_jpeg(dataset_path, jpeg_dataset_path)

In [None]:
# Load training and validation datasets
batch_size = 64
img_size = (512, 512)

train_ds = tf.keras.preprocessing.image_dataset_from_directory(
    jpeg_dataset_path,
    validation_split=0.2,
    subset="training",
    seed=123,
    image_size=img_size,
    batch_size=batch_size
)

val_ds = tf.keras.preprocessing.image_dataset_from_directory(
    jpeg_dataset_path,
    validation_split=0.2,
    subset="validation",
    seed=123,
    image_size=img_size,
    batch_size=batch_size
)
# Check the class names
class_names = train_ds.class_names
print("Class names:", class_names)

# Apply normalization
normalization_layer = tf.keras.layers.Rescaling(1./255)
train_ds = train_ds.map(lambda x, y: (normalization_layer(x), y))
val_ds = val_ds.map(lambda x, y: (normalization_layer(x), y))
val_ds = raw_val_ds.map(lambda x, y: (normalization_layer(x), y))

# Display first 9 images
plt.figure(figsize=(10, 10))
for images, labels in train_ds.take(1):
    for i in range(9):
        ax = plt.subplot(3, 3, i + 1)
        plt.imshow(images[i].numpy())
        plt.title(class_names[labels[i]])
        plt.axis("off")
plt.show()

In [None]:
import matplotlib.pyplot as plt
# Define data augmentation
data_augmentation = tf.keras.Sequential([
    tf.keras.layers.RandomFlip("horizontal"),
    tf.keras.layers.RandomRotation(0.2),
    tf.keras.layers.RandomZoom(height_factor=(-0.2, -0.3), width_factor=(-0.2, -0.3)),
    tf.keras.layers.RandomContrast(factor=0.1),
    tf.keras.layers.RandomTranslation(height_factor=0.1, width_factor=0.1),
])

# Display first 9 images from the normalized & augmented dataset
plt.figure(figsize=(10, 10))
for images, labels in train_ds.take(1):
    for i in range(9):
        ax = plt.subplot(3, 3, i + 1)
        plt.imshow(images[i].numpy())
        plt.title(class_names[labels[i].numpy()])
        plt.axis("off")
plt.show()

In [None]:
# Define the CNN architecture
def create_cnn_model(input_shape, num_classes):
    model = tf.keras.models.Sequential([
        tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Dropout(0.25),
        
        tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Dropout(0.25),
        
        tf.keras.layers.Conv2D(128, (3, 3), activation='relu'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Dropout(0.25),
        
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(128, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001)),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dropout(0.5),
        
        tf.keras.layers.Dense(num_classes, activation='softmax')
    ])
    model.summary()
    return model

input_shape = (512, 512, 3)
num_classes = len(class_names)

cnn_model = create_cnn_model(input_shape, num_classes)

In [None]:
# Compile the model
cnn_model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

In [None]:
# Implement early stopping
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=2, restore_best_weights=True)

# Train the model
history = cnn_model.fit(train_ds,
                        validation_data=val_ds,
                        epochs=5,
                        callbacks=[early_stopping])

In [None]:
# Evaluate the model
val_loss, val_accuracy = cnn_model.evaluate(val_ds)
print(f'Validation accuracy: {val_accuracy:.2f}')
print(f'Validation loss: {val_loss:.2f}')

In [None]:
# Save the model
cnn_model.save('/kaggle/working/cnn_model.keras')

# Load model
loaded_model = tf.keras.models.load_model('/kaggle/working/cnn_model.keras')

In [None]:
# Visualize training history
import matplotlib.pyplot as plt

plt.figure(figsize=(14, 7))
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='training loss')
plt.plot(history.history['val_loss'], label='validation loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['accuracy'], label='training accuracy')
plt.plot(history.history['val_accuracy'], label='validation accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

plt.show()
