In [38]:
import os
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization, GlobalAveragePooling2D, Input, Concatenate
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
from skimage.feature import hog
from skimage import exposure
import cv2

# Define constants
IMAGE_SIZE = (192, 192)  # Reduced image size
BATCH_SIZE = 32
EPOCHS = 80

# Data augmentation settings
augmentation_datagen = ImageDataGenerator(
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

def preprocess_image(img_path):
    img = cv2.imread(img_path)
    img = cv2.resize(img, IMAGE_SIZE)
    
    img = cv2.GaussianBlur(img, (5, 5), 0)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    
    fd, hog_image = hog(gray, orientations=8, pixels_per_cell=(16, 16),
                        cells_per_block=(1, 1), visualize=True)
    
    hog_image = exposure.rescale_intensity(hog_image, in_range=(0, 10))
    hog_image = np.expand_dims(hog_image, axis=-1)
    combined_img = np.concatenate((img, hog_image), axis=-1).astype(np.float32)
    return combined_img

# Function to artificially increase the dataset size using augmentation
def augment_and_expand_dataset(X, y):
    X_augmented = []
    y_augmented = []

    for i in range(len(X)):
        img = X[i]
        img = np.expand_dims(img, axis=0)  # Add batch dimension

        # Generate augmented images
        augment_iter = augmentation_datagen.flow(img, batch_size=1)
        for _ in range(2):  # Reduced to 2 augmentations per image
            aug_img = next(augment_iter)[0].astype(np.float32)
            X_augmented.append(aug_img)
            y_augmented.append(y[i])  # Duplicate the corresponding label
    
    # Add original images back into the augmented dataset
    X_augmented.extend(X)
    y_augmented.extend(y)
    
    return np.array(X_augmented, dtype=np.float32), np.array(y_augmented)

def batch_process(csv_file, image_folder, batch_size=500):
    df = pd.read_csv(csv_file)
    X_all = []
    y_all = []
    
    for i in range(0, len(df), batch_size):
        batch_df = df.iloc[i:i+batch_size]
        X_batch = []
        y_batch = []
        
        for _, row in batch_df.iterrows():
            img_path = os.path.join(image_folder, row['filename'])
            img_array = preprocess_image(img_path)
            X_batch.append(img_array)
            y_batch.append(row['percentage'])
        
        X_batch, y_batch = augment_and_expand_dataset(np.array(X_batch), np.array(y_batch))
        X_all.extend(X_batch)
        y_all.extend(y_batch)
    
    return np.array(X_all, dtype=np.float32), np.array(y_all)

def create_model():
    input_tensor = Input(shape=(IMAGE_SIZE[0], IMAGE_SIZE[1], 4))
    
    # Use only the RGB channels for the MobileNetV2 base
    rgb_tensor = input_tensor[:, :, :, :3]
    base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(IMAGE_SIZE[0], IMAGE_SIZE[1], 3))
    x = base_model(rgb_tensor)
    
    # Fine-tune the top layers
    for layer in base_model.layers[-20:]:
        layer.trainable = True
    
    x = GlobalAveragePooling2D()(x)
    
    # Add a convolutional layer for the HOG feature
    hog_tensor = input_tensor[:, :, :, 3:4]
    hog_conv = Conv2D(32, (3, 3), activation='relu')(hog_tensor)
    hog_pool = GlobalAveragePooling2D()(hog_conv)
    
    # Concatenate MobileNetV2 features and HOG features
    x = Concatenate()([x, hog_pool])
    
    x = Dense(256, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    x = Dense(128, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(0.3)(x)
    x = Dense(64, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(0.2)(x)
    output = Dense(1)(x)
    
    model = Model(inputs=input_tensor, outputs=output)
    
    model.compile(optimizer=Adam(learning_rate=0.0001),
                  loss='mean_squared_error',
                  metrics=['mae'])
    return model

def train_model(X_train, y_train, X_val, y_val):
    model = create_model()
    
    # Data augmentation during training
    datagen = ImageDataGenerator(
        rotation_range=20,
        width_shift_range=0.2,
        height_shift_range=0.2,
        horizontal_flip=True,
        zoom_range=0.2
    )
    
    early_stopping = EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True)
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=1e-6)
    
    # Ensure the generator repeats to prevent dataset exhaustion
    history = model.fit(
        datagen.flow(X_train, y_train, batch_size=BATCH_SIZE).repeat(),
        steps_per_epoch=len(X_train) // BATCH_SIZE,
        validation_data=(X_val, y_val),
        epochs=EPOCHS,
        callbacks=[early_stopping, reduce_lr],
        verbose=1
    )
    return model, history

def predict_completion(model, image_path):
    img_array = preprocess_image(image_path)
    img_array = np.expand_dims(img_array, axis=0)
    prediction = model.predict(img_array)[0][0]
    return prediction

def detect_defects(image_path):
    img = cv2.imread(image_path)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    
    # Edge detection
    edges = cv2.Canny(gray, 50, 150)
    
    # Contour detection
    contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    # Analyze contours for potential defects
    defects = []
    for contour in contours:
        area = cv2.contourArea(contour)
        if area > 100:  # Adjust this threshold as needed
            defects.append(contour)
    
    return len(defects)

In [39]:
# Main execution
if __name__ == "__main__":
    csv_file = r'D:\Coding\Files\Project\Construction 2.0\data processing (image)\labels.csv'
    image_folder = r'D:\Coding\Files\Project\Construction 2.0\data processing (image)'

    # Load and prepare the dataset in batches
    X, y = batch_process(csv_file, image_folder)

    # Split dataset
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

    # Train the model
    model, history = train_model(X_train, y_train, X_val, y_val)

    # Evaluate the model
    test_loss, test_mae = model.evaluate(X_test, y_test, verbose=0)
    print(f"Test Mean Absolute Error: {test_mae:.4f}")

    # Save the model
    model.save('construction_progress_model.h5')
    print("Model saved successfully.")

MemoryError: Unable to allocate 1.25 GiB for an array with shape (2283, 192, 192, 4) and data type float32

In [None]:
# Save the model
model.save('building_completion_model_0.3_augmented.h5')

print("Enhanced model with augmented data has been trained and saved as 'building_completion_model_0.3_augmented.h5'")

# Example usage of defect detection
sample_image_path = os.path.join(image_folder, X_test[0])
defect_count = detect_defects(sample_image_path)
print(f"Number of potential defects detected: {defect_count}")

# Predict completion percentage
completion_percentage = predict_completion(model, sample_image_path)
print(f"Predicted completion percentage: {completion_percentage:.2f}%")