In [1]:
import zipfile
import os

# Replace with the actual name of your uploaded zip file
zip_path = "/content/drive/MyDrive/dataset.zip"  # ← Change this

# Destination directory
extract_to = "/content/crack_dataset"

# Create the directory if it doesn't exist
os.makedirs(extract_to, exist_ok=True)

# Extract
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_to)

print(f"✅ Extraction complete! Files are in: {extract_to}")


✅ Extraction complete! Files are in: /content/crack_dataset


In [2]:
import os
import tensorflow as tf
from tensorflow.keras import layers, models


In [3]:
import shutil
import glob # Import the glob module

# Create new folders
os.makedirs("/content/crack_dataset/crack", exist_ok=True)
os.makedirs("/content/crack_dataset/no_crack", exist_ok=True)

# Move positive (crack) images
pos_imgs = glob.glob("/content/crack_dataset/Positive*.jpg")  # update path if needed
for f in pos_imgs:
    shutil.copy(f, "/content/crack_dataset/crack")

# Move negative (no crack) images
neg_imgs = glob.glob("/content/crack_dataset/Negative*.jpg")  # update path if needed
for f in neg_imgs:
    shutil.copy(f, "/content/crack_dataset/no_crack")

In [4]:
import os
import shutil
from glob import glob

# Step 1: Define paths (adjust based on your extracted folders)
positive_folder = "/content/crack_dataset/Positive"    # Folder where crack images are
negative_folder = "/content/crack_dataset/Negative"    # Folder where no crack images are
combined_dataset = "/content/crack_data"

# Step 2: Create unified folder structure
os.makedirs(os.path.join(combined_dataset, "crack"), exist_ok=True)
os.makedirs(os.path.join(combined_dataset, "no_crack"), exist_ok=True)

# Step 3: Collect image paths (recursive to capture all subfolders)
crack_images = glob(os.path.join(positive_folder, "**", "*.*"), recursive=True)
no_crack_images = glob(os.path.join(negative_folder, "**", "*.*"), recursive=True)

# Optional: filter by image extensions
image_extensions = ['.jpg', '.jpeg', '.png', '.JPG', '.JPEG', '.PNG']
crack_images = [f for f in crack_images if os.path.splitext(f)[1].lower() in image_extensions]
no_crack_images = [f for f in no_crack_images if os.path.splitext(f)[1].lower() in image_extensions]

print(f"✅ Found {len(crack_images)} crack images")
print(f"✅ Found {len(no_crack_images)} no crack images")

# Step 4: Copy images into final folders
for f in crack_images:
    shutil.copy(f, os.path.join(combined_dataset, "crack"))

for f in no_crack_images:
    shutil.copy(f, os.path.join(combined_dataset, "no_crack"))

print("✅ All images copied to /content/crack_data")


✅ Found 20000 crack images
✅ Found 20000 no crack images
✅ All images copied to /content/crack_data


In [5]:
import tensorflow as tf

batch_size = 32
img_size = (128, 128)
seed = 42

# 🔹 Step 1: Split off 90% (train+val) and 10% (test)
full_ds = tf.keras.utils.image_dataset_from_directory(
    "/content/crack_data",
    image_size=img_size,
    batch_size=batch_size,
    label_mode='binary',
    validation_split=0.1,
    subset='training',
    seed=seed
)

test_ds = tf.keras.utils.image_dataset_from_directory(
    "/content/crack_data",
    image_size=img_size,
    batch_size=batch_size,
    label_mode='binary',
    validation_split=0.1,
    subset='validation',
    seed=seed
)

# 🔹 Step 2: From the 90%, split again into train (70%) and val (20%)
train_ds = tf.keras.utils.image_dataset_from_directory(
    "/content/crack_data",
    image_size=img_size,
    batch_size=batch_size,
    label_mode='binary',
    validation_split=0.3,  # 30% of 90% = 27% of full
    subset='training',
    seed=seed
)

val_ds = tf.keras.utils.image_dataset_from_directory(
    "/content/crack_data",
    image_size=img_size,
    batch_size=batch_size,
    label_mode='binary',
    validation_split=0.3,
    subset='validation',
    seed=seed
)

Found 40000 files belonging to 2 classes.
Using 36000 files for training.
Found 40000 files belonging to 2 classes.
Using 4000 files for validation.
Found 40000 files belonging to 2 classes.
Using 28000 files for training.
Found 40000 files belonging to 2 classes.
Using 12000 files for validation.


In [6]:
import tensorflow as tf

batch_size = 32
img_size = (128, 128)

train_ds = tf.keras.utils.image_dataset_from_directory(
    "/content/crack_data",
    image_size=img_size,
    batch_size=batch_size,
    label_mode='binary',
    validation_split=0.3,
    subset='training',
    seed=42
)

val_ds = tf.keras.utils.image_dataset_from_directory(
    "/content/crack_data",
    image_size=img_size,
    batch_size=batch_size,
    label_mode='binary',
    validation_split=0.3,
    subset='validation',
    seed=42
)

print(train_ds.class_names)

Found 40000 files belonging to 2 classes.
Using 28000 files for training.
Found 40000 files belonging to 2 classes.
Using 12000 files for validation.
['crack', 'no_crack']


In [None]:
from tensorflow.keras import layers, models

model = models.Sequential([
    layers.Rescaling(1./255, input_shape=(128, 128, 3)),
    layers.Conv2D(32, 3, activation='relu'),
    layers.MaxPooling2D(),
    layers.Conv2D(64, 3, activation='relu'),
    layers.MaxPooling2D(),
    layers.Conv2D(128, 3, activation='relu'),
    layers.MaxPooling2D(),
    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dense(1, activation='sigmoid')  # Binary classification
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

model.fit(train_ds, validation_data=val_ds, epochs=10)
model.save("crack_classifier.h5")

  super().__init__(**kwargs)


Epoch 1/10
[1m186/875[0m [32m━━━━[0m[37m━━━━━━━━━━━━━━━━[0m [1m13:01[0m 1s/step - accuracy: 0.8252 - loss: 0.3628

In [None]:
import numpy as np

y_true = []
y_pred = []

for images, labels in test_ds:
    preds = model.predict(images)
    preds_binary = (preds.flatten() >= 0.5).astype(int)  # sigmoid threshold at 0.5
    y_pred.extend(preds_binary)
    y_true.extend(labels.numpy().astype(int))

from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

acc = accuracy_score(y_true, y_pred)
print(f"✅ Test Accuracy: {acc:.2f}")

print("\n📊 Classification Report:")
print(classification_report(y_true, y_pred, target_names=train_ds.class_names))

print("📌 Confusion Matrix:")
print(confusion_matrix(y_true, y_pred))

In [None]:
print("Class mapping:", {i: name for i, name in enumerate(train_ds.class_names)})

In [None]:
from tensorflow.keras.preprocessing import image
import numpy as np
import matplotlib.pyplot as plt

# ✅ Step 2: Load and preprocess the image
img_path = "/content/crack_data/no_crack/00003.jpg"  # Change to your image path

img = image.load_img(img_path, target_size=(128, 128))  # Resize to match model input
img_array = image.img_to_array(img)  # Normalize to 0-1
img_array = np.expand_dims(img_array, axis=0)  # Add batch dimension

# ✅ Step 3: Predict
prediction = model.predict(img_array)[0][0]  # sigmoid value between 0 and 1

# ✅ Step 4: Map prediction to class
class_names = train_ds.class_names  # Should be ['crack', 'no_crack']
predicted_class = class_names[int(prediction >= 0.5)]

# ✅ Step 5: Show result
plt.imshow(img)
plt.axis('off')
plt.title(f"Predicted: {predicted_class} ({prediction:.2f})")
plt.show()