In [1]:
# prompt: code to extract dataset from kagg;e

!pip install kaggle

# # Upload your kaggle.json API key
# from google.colab import files
# files.upload()

# Create a Kaggle directory and move the API key there
!mkdir ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

# Download the dataset (replace with the actual dataset URL)
!kaggle datasets download -d aniruddhsharma/structural-defects-network-concrete-crack-images

# Unzip the dataset (if it's a zip file)


# Example: Download the Titanic dataset
#!kaggle datasets download -d hesh97/titanicdataset-traincsv
#!unzip titanicdataset-traincsv.zip

cp: cannot stat 'kaggle.json': No such file or directory
chmod: cannot access '/root/.kaggle/kaggle.json': No such file or directory
Dataset URL: https://www.kaggle.com/datasets/aniruddhsharma/structural-defects-network-concrete-crack-images
License(s): other
Downloading structural-defects-network-concrete-crack-images.zip to /content
 98% 496M/506M [00:04<00:00, 97.8MB/s]
100% 506M/506M [00:05<00:00, 104MB/s] 


In [2]:
!unzip "/content/structural-defects-network-concrete-crack-images.zip"

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: Walls/Non-cracked/7117-203.jpg  
  inflating: Walls/Non-cracked/7117-204.jpg  
  inflating: Walls/Non-cracked/7117-205.jpg  
  inflating: Walls/Non-cracked/7117-206.jpg  
  inflating: Walls/Non-cracked/7117-207.jpg  
  inflating: Walls/Non-cracked/7117-208.jpg  
  inflating: Walls/Non-cracked/7117-21.jpg  
  inflating: Walls/Non-cracked/7117-211.jpg  
  inflating: Walls/Non-cracked/7117-212.jpg  
  inflating: Walls/Non-cracked/7117-213.jpg  
  inflating: Walls/Non-cracked/7117-214.jpg  
  inflating: Walls/Non-cracked/7117-215.jpg  
  inflating: Walls/Non-cracked/7117-216.jpg  
  inflating: Walls/Non-cracked/7117-217.jpg  
  inflating: Walls/Non-cracked/7117-218.jpg  
  inflating: Walls/Non-cracked/7117-219.jpg  
  inflating: Walls/Non-cracked/7117-22.jpg  
  inflating: Walls/Non-cracked/7117-220.jpg  
  inflating: Walls/Non-cracked/7117-221.jpg  
  inflating: Walls/Non-cracked/7117-222.jpg  
  inflating: Wall

In [3]:
import os
import shutil
from sklearn.model_selection import train_test_split

# Paths and directory setup

root_dir = "/content"
subdirs = ['Decks', 'Pavements', 'Walls']
categories = ['Cracked', 'Non-cracked']  # Consistent naming here
split_ratio = 0.8

# Create the desired directory structure
def create_directory_structure(base_dir):
    if not os.path.exists(base_dir):
        os.makedirs(base_dir)

    for split in ['Train', 'Test']:
        split_dir = os.path.join(base_dir, split)
        os.makedirs(split_dir, exist_ok=True)

        for subdir in subdirs:
            subdir_path = os.path.join(split_dir, subdir)
            os.makedirs(subdir_path, exist_ok=True)

            for category in categories:
                category_path = os.path.join(subdir_path, category)
                os.makedirs(category_path, exist_ok=True)

# Function to move images to corresponding directories
def move_images_to_structure(base_dir, img_files, subdir, category, split):
    for img_file in img_files:
        dest_dir = os.path.join(base_dir, split, subdir, category)
        shutil.move(img_file, dest_dir)

# Function to split and distribute images
def distribute_images(base_dir, img_files, subdir, category):
    if len(img_files) > 0:
        # Split the images into train and test sets
        train_imgs, test_imgs = train_test_split(img_files, train_size=split_ratio, random_state=42)

        # Move images to corresponding directories
        move_images_to_structure(base_dir, train_imgs, subdir, category, 'Train')
        move_images_to_structure(base_dir, test_imgs, subdir, category, 'Test')
    else:
        print(f"No images found in {subdir}/{category}. Skipping...")

# Main function to organize dataset
def organize_dataset(base_dir):
    # Create the directory structure
    create_directory_structure(base_dir)

    # Assuming the images are already organized in 'Cracked' and 'Non-cracked' directories under each subdir
    for subdir in subdirs:
        for category in categories:
            category_dir = os.path.join(root_dir, subdir, category)

            if os.path.exists(category_dir):
                # List all image files in the current category
                img_files = [os.path.join(category_dir, f) for f in os.listdir(category_dir) if f.endswith(('jpg', 'png'))]

                # Check if there are images to split
                if len(img_files) == 0:
                    print(f"No images found in {category_dir}. Skipping...")
                else:
                    # Distribute images into train and test directories
                    distribute_images(base_dir, img_files, subdir, category)
            else:
                print(f"Directory {category_dir} does not exist. Skipping...")

    # Remove any accidentally created "Non-Cracked" directory
    for split in ['Train', 'Test']:
        for subdir in subdirs:
            non_cracked_dir = os.path.join(base_dir, split, subdir, 'Non-Cracked')
            if os.path.exists(non_cracked_dir) and not os.listdir(non_cracked_dir):
                os.rmdir(non_cracked_dir)
                print(f"Removed empty directory: {non_cracked_dir}")

if __name__ == "__main__":
    # Ensure the root directory is where the dataset is located
    organize_dataset(root_dir)


In [4]:
import tensorflow as tf
from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import os

In [11]:
train_dir = '/content/Train'
test_dir = '/content/Test'
# Use a subset of the training data as validation data
train_datagen = ImageDataGenerator(
    rescale=1.0/255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    validation_split=0.2  # 20% for validation
)

# First-stage classifier for Decks, Pavements, Walls
train_generator_stage1 = train_datagen.flow_from_directory(
    train_dir,
    target_size=(256,256),
    batch_size=32,
    class_mode='categorical',
    classes=['Decks', 'Pavements', 'Walls'],
    subset='training'
)

validation_generator_stage1 = train_datagen.flow_from_directory(
    train_dir,
    target_size=(256,256),
    batch_size=32,
    class_mode='categorical',
    classes=['Decks', 'Pavements', 'Walls'],
    subset='validation'
)

Found 35898 images belonging to 3 classes.
Found 8973 images belonging to 3 classes.


In [12]:
# Load VGG16 with pre-trained ImageNet weights, excluding top layers
vgg16_base = VGG16(weights='imagenet', include_top=False, input_shape=(256, 256, 3))

# Freeze base layers
for layer in vgg16_base.layers:
    layer.trainable = False

# Add custom classification head
x = Flatten()(vgg16_base.output)
x = Dense(256, activation='relu')(x)
x = Dense(3, activation='softmax')(x)  # 3 classes for Decks, Pavements, Walls

In [13]:
model_stage1 = Model(vgg16_base.input, outputs=x)
model_stage1.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train first-stage model
model_stage1.fit(train_generator_stage1, validation_data=validation_generator_stage1, epochs=10)



Epoch 1/10


  self._warn_if_super_not_called()


[1m1122/1122[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m754s[0m 653ms/step - accuracy: 0.8370 - loss: 0.4937 - val_accuracy: 0.9254 - val_loss: 0.2038
Epoch 2/10
[1m1122/1122[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m746s[0m 617ms/step - accuracy: 0.8982 - loss: 0.2601 - val_accuracy: 0.9203 - val_loss: 0.2117
Epoch 3/10
[1m1122/1122[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m747s[0m 621ms/step - accuracy: 0.9071 - loss: 0.2409 - val_accuracy: 0.9201 - val_loss: 0.2054
Epoch 4/10
[1m1122/1122[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m741s[0m 621ms/step - accuracy: 0.9120 - loss: 0.2255 - val_accuracy: 0.9121 - val_loss: 0.2298
Epoch 5/10
[1m1122/1122[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m741s[0m 621ms/step - accuracy: 0.9194 - loss: 0.2092 - val_accuracy: 0.9125 - val_loss: 0.2226
Epoch 6/10
[1m1122/1122[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m739s[0m 618ms/step - accuracy: 0.9229 - loss: 0.1985 - val_accuracy: 0.9300 - val_loss: 0.1852
Epo

<keras.src.callbacks.history.History at 0x7acdc9207130>

In [17]:
# Evaluate on test data

test_datagen = ImageDataGenerator(rescale=1.0/255)

test_generator_stage1 = test_datagen.flow_from_directory(
    test_dir,
    target_size=(256, 256),
    batch_size=32,
    class_mode='categorical',
    classes=['Decks', 'Pavements', 'Walls']
)

test_accuracy = model_stage1.evaluate(test_generator_stage1)
print(f"First-stage model test accuracy: {test_accuracy:.2f}")

Found 11221 images belonging to 3 classes.


  self._warn_if_super_not_called()


[1m351/351[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m67s[0m 191ms/step - accuracy: 0.9442 - loss: 0.1427


TypeError: unsupported format string passed to list.__format__

In [18]:
# prompt: code line to save model_stage1

model_stage1.save('model_stage1.h5')

