# **Extract the dataset.**

In [4]:
import zipfile
import os

# Assuming the uploaded file is named 'dataset.zip'
uploaded_zip_path = '/content/dataset.zip'
extract_path = '/content/NewDataSet'  # Path to extract the dataset

# Create the extract directory if it doesn't exist
os.makedirs(extract_path, exist_ok=True)

# Extract the zip file
with zipfile.ZipFile(uploaded_zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

print(f'Dataset extracted to {extract_path}')


Dataset extracted to /content/NewDataSet


In [5]:
# List files in the extracted directory
extracted_files = os.listdir(extract_path)
print(f'Extracted files: {extracted_files}')


Extracted files: ['__MACOSX', 'dataset']


In [6]:
import shutil
from sklearn.model_selection import train_test_split

# Define paths
test_dir = '/content/NewDataSet/dataset/test'
train_dir = '/content/NewDataSet/dataset/train'
temp_train_dir = '/content/NewDataSet/dataset/temp_train'
temp_validation_dir = '/content/NewDataSet/dataset/temp_validation'

# Create temporary directories for split data
os.makedirs(temp_train_dir, exist_ok=True)
os.makedirs(temp_validation_dir, exist_ok=True)

# Function to split data into training and validation sets
def split_data(source_dir, train_dir, val_dir, train_size=0.7):
    for class_name in os.listdir(source_dir):
        class_path = os.path.join(source_dir, class_name)
        if os.path.isdir(class_path):
            images = os.listdir(class_path)
            train_images, val_images = train_test_split(images, train_size=train_size)

            os.makedirs(os.path.join(train_dir, class_name), exist_ok=True)
            os.makedirs(os.path.join(val_dir, class_name), exist_ok=True)

            for image in train_images:
                shutil.copy(os.path.join(class_path, image), os.path.join(train_dir, class_name, image))
            for image in val_images:
                shutil.copy(os.path.join(class_path, image), os.path.join(val_dir, class_name, image))

# Split the test data into training and validation sets
split_data(test_dir, temp_train_dir, temp_validation_dir, train_size=0.7)


# **Q2**

In [8]:
!pip install wandb

Collecting wandb
  Downloading wandb-0.17.0-py3-none-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (6.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.7/6.7 MB[0m [31m48.0 MB/s[0m eta [36m0:00:00[0m
Collecting docker-pycreds>=0.4.0 (from wandb)
  Downloading docker_pycreds-0.4.0-py2.py3-none-any.whl (9.0 kB)
Collecting gitpython!=3.1.29,>=1.0.0 (from wandb)
  Downloading GitPython-3.1.43-py3-none-any.whl (207 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m207.3/207.3 kB[0m [31m23.1 MB/s[0m eta [36m0:00:00[0m
Collecting sentry-sdk>=1.0.0 (from wandb)
  Downloading sentry_sdk-2.3.1-py2.py3-none-any.whl (289 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m289.0/289.0 kB[0m [31m29.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting setproctitle (from wandb)
  Downloading setproctitle-1.3.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86

In [9]:
import os
import shutil
import random
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization, Activation
from tensorflow.keras.optimizers import Adam
from wandb.integration.keras import WandbCallback

In [10]:
!pip install --upgrade wandb



In [11]:
import os
import wandb

# Set WANDB_API_KEY environment variable (replace 'your_api_key' with your actual API key)
os.environ['WANDB_API_KEY'] = '5237a13f8f1acba19960eff228299c80defa0a3d'

# Initialize wandb
wandb.login()
wandb.init(project="Q2", entity="praba00021")


[34m[1mwandb[0m: Currently logged in as: [33mpraba00021[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [17]:
# Define paths
train_data_dir = '/content/NewDataSet/dataset/train'
test_data_dir = '/content/NewDataSet/dataset/test'
val_data_dir = '/content/NewDataSet/dataset/test'

In [18]:
# Set aside 10% of the training data for hyperparameter tuning
train_images = []
class_labels = []
for class_name in os.listdir(train_data_dir):
    class_path = os.path.join(train_data_dir, class_name)
    if os.path.isdir(class_path):  # Ensure it's a directory
        for img in os.listdir(class_path):
            if img != '.DS_Store':  # Ignore .DS_Store files
                train_images.append(os.path.join(class_path, img))
                class_labels.append(class_name)

# Split data into training and validation sets
hyperparam_tuning_data, val_data, hyperparam_tuning_labels, val_labels = train_test_split(
    train_images, class_labels, test_size=0.1, random_state=42, stratify=class_labels)


In [19]:
# Move validation data to a separate directory
os.makedirs(val_data_dir, exist_ok=True)

for img_path, label in zip(val_data, val_labels):
    class_dir = os.path.join(val_data_dir, label)
    os.makedirs(class_dir, exist_ok=True)
    shutil.move(img_path, class_dir)

# Prepare data generators
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True
)

val_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    train_data_dir,
    target_size=(64, 64),
    batch_size=32,
    class_mode='categorical'
)

val_generator = val_datagen.flow_from_directory(
    val_data_dir,
    target_size=(64, 64),
    batch_size=32,
    class_mode='categorical'
)


Found 585 images belonging to 2 classes.
Found 473 images belonging to 2 classes.


In [20]:
# Calculate the number of classes
num_classes = len([d for d in os.listdir(train_data_dir) if os.path.isdir(os.path.join(train_data_dir, d))])


In [21]:
# Define function to build the model
def build_model(input_shape, num_classes, filters_per_layer, dropout_rate, use_batch_normalization):
    model = Sequential()

    for i, filters in enumerate(filters_per_layer):
        if i == 0:
            model.add(Conv2D(filters, (3, 3), padding='same', input_shape=input_shape))
        else:
            model.add(Conv2D(filters, (3, 3), padding='same'))

        if use_batch_normalization:
            model.add(BatchNormalization())

        model.add(Activation('relu'))
        model.add(MaxPooling2D(pool_size=(2, 2)))

        if dropout_rate:
            model.add(Dropout(dropout_rate))

    model.add(Flatten())
    model.add(Dense(512, activation='relu'))
    model.add(Dense(num_classes, activation='softmax'))

    return model


In [22]:
# Define hyperparameters to tune
filters_per_layer_options = [
    [32, 32, 32, 32, 32],  # Same number of filters in all layers
    [32, 64, 128, 256, 512],  # Doubling in each subsequent layer
    [512, 256, 128, 64, 32]  # Halving in each subsequent layer
]
dropout_rates = [0.2, 0.3]
use_batch_normalization = [True, False]


In [23]:
# Ensure filters_per_layer_options, dropout_rates, use_batch_normalization, train_generator, val_generator are defined correctly

from tensorflow.keras.layers import BatchNormalization, Dropout
from wandb.integration.keras import WandbCallback

# Train and evaluate models with different hyperparameter configurations
for filters_per_layer in filters_per_layer_options:
    for dropout_rate in dropout_rates:
        for batch_norm in use_batch_normalization:
            # Build model
            model = build_model(input_shape=(64, 64, 3), num_classes=num_classes, filters_per_layer=filters_per_layer, dropout_rate=dropout_rate, use_batch_normalization=batch_norm)

            # Compile model
            model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])

            # Train model
            model.fit(train_generator, epochs=10, validation_data=val_generator, callbacks=[WandbCallback()])




Epoch 1/10

  saving_api.save_model(
[34m[1mwandb[0m: Adding directory to artifact (/content/wandb/run-20240528_070410-7hqkl37e/files/model-best)... Done. 0.0s


Epoch 2/10

  saving_api.save_model(
[34m[1mwandb[0m: Adding directory to artifact (/content/wandb/run-20240528_070410-7hqkl37e/files/model-best)... Done. 0.0s


Epoch 3/10

  saving_api.save_model(
[34m[1mwandb[0m: Adding directory to artifact (/content/wandb/run-20240528_070410-7hqkl37e/files/model-best)... Done. 0.0s


Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10

  saving_api.save_model(
[34m[1mwandb[0m: Adding directory to artifact (/content/wandb/run-20240528_070410-7hqkl37e/files/model-best)... Done. 0.0s


Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10

  saving_api.save_model(
[34m[1mwandb[0m: Adding directory to artifact (/content/wandb/run-20240528_070410-7hqkl37e/files/model-best)... Done. 0.0s


Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10

  saving_api.save_model(
[34m[1mwandb[0m: Adding directory to artifact (/content/wandb/run-20240528_070410-7hqkl37e/files/model-best)... Done. 0.2s


Epoch 8/10

  saving_api.save_model(
[34m[1mwandb[0m: Adding directory to artifact (/content/wandb/run-20240528_070410-7hqkl37e/files/model-best)... Done. 0.2s


Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
