<a href="https://colab.research.google.com/github/Sree14hari/Road-Crack-Classification-CNN/blob/main/roadcrack_cnn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import zipfile
import os
# Install the rarfile library
!pip install rarfile

import rarfile

# --- YOU NEED TO UPDATE THIS PATH ---
# Path to your zipped dataset in Google Drive or Colab environment
rar_file_path = '/content/drive/MyDrive/Augmented_Dataset.rar'
# ------------------------------------

# Directory to extract the files to in the Colab environment
extract_path = '/content/dataset'

# Create the directory if it doesn't exist
if not os.path.exists(extract_path):
    os.makedirs(extract_path)

print(f"Unzipping {rar_file_path} to {extract_path}...")
try:
    with rarfile.RarFile(rar_file_path, 'r') as rf:
        rf.extractall(extract_path)
    print("Unzipping complete! ✅")
except rarfile.Error as e:
    print(f"Error unzipping RAR file: {e}")


# Let's verify the contents
print("\nContents of the extracted folder:")
# Note: Adjust the folder name if the unzipping creates a different parent folder.
# You might need to inspect the extracted directory structure to find the correct path
extracted_content_path = extract_path # Start by listing the top-level extracted directory

if os.path.exists(extracted_content_path):
    print(os.listdir(extracted_content_path))
else:
    print(f"Extracted path {extracted_content_path} not found.")

In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import Flatten, Dense, Dropout
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt

# --- 1. SETUP: Define Paths and Parameters ---
# Path to the extracted dataset in Colab
base_dir = '/content/dataset/Augmented_Dataset'
train_dir = os.path.join(base_dir, 'train')
val_dir = os.path.join(base_dir, 'val')
test_dir = os.path.join(base_dir, 'test')

IMG_SIZE = (224, 224)
BATCH_SIZE = 32
EPOCHS = 20 # Start with 20 epochs
NUM_CLASSES = 4

# --- 2. DATA PREPARATION: Create Data Generators ---
datagen = ImageDataGenerator(rescale=1./255)

train_generator = datagen.flow_from_directory(
    train_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical'
)

validation_generator = datagen.flow_from_directory(
    val_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical'
)

# --- 3. MODEL BUILDING: Use Transfer Learning with VGG16 ---
# Load the VGG16 base model, pre-trained on ImageNet
base_model = VGG16(input_shape=(IMG_SIZE[0], IMG_SIZE[1], 3),
                   include_top=False, # Don't include the final ImageNet classifier
                   weights='imagenet')

# Freeze the layers of the base model so they are not re-trained
base_model.trainable = False

# Create your new model on top of the base model
model = Sequential([
    base_model,
    Flatten(),
    Dense(512, activation='relu'),
    Dropout(0.5), # Regularization
    Dense(NUM_CLASSES, activation='softmax') # Your final output layer
])

# --- 4. MODEL COMPILATION ---
# Use a lower learning rate for fine-tuning
model.compile(optimizer=Adam(learning_rate=0.0001),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

model.summary()

# --- 5. TRAINING ---
print("\nStarting model training...")
history = model.fit(
    train_generator,
    epochs=EPOCHS,
    validation_data=validation_generator
)
print("Training finished!")

# --- 6. VISUALIZATION ---
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs_range = range(EPOCHS)

plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

# --- 7. EVALUATION ---
test_generator = datagen.flow_from_directory(
    test_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=False
)

print("\nEvaluating on test data...")
test_loss, test_acc = model.evaluate(test_generator)
print(f"Test Accuracy: {test_acc*100:.2f}%")

In [None]:
from sklearn.metrics import confusion_matrix, classification_report
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Get the true labels from the test generator
test_labels = test_generator.classes

# Get the predicted labels from the model
predictions = model.predict(test_generator)
predicted_labels = np.argmax(predictions, axis=1)

# Get the class names from the generator
class_names = list(test_generator.class_indices.keys())

# Generate the confusion matrix
cm = confusion_matrix(test_labels, predicted_labels)

# Plot the confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
plt.title('Confusion Matrix')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()

# Generate the classification report
print("\nClassification Report:")
print(classification_report(test_labels, predicted_labels, target_names=class_names))