# Combining the datasets


In [None]:
import pandas as pd
import os

csv_paths  = ['/kaggle/input/people-detection/dataset1/df.csv', '/kaggle/input/people-detection/dataset2/df.csv', '/kaggle/input/people-detection/dataset3/df.csv']  # Add more file names as needed
dfs = []

for csv_path in csv_paths:
    df = pd.read_csv(csv_path, header=None, names=['image_path', 'mask_path', 'collage_path'])

    base_folder = os.path.dirname(csv_path)

    df['image_path'] = df['image_path'].apply(lambda x: os.path.join(base_folder, x))
    df['mask_path'] = df['mask_path'].apply(lambda x: os.path.join(base_folder, x))
    df['collage_path'] = df['collage_path'].apply(lambda x: os.path.join(base_folder, x))

    df = df[df['image_path'].str.endswith('.png') & df['mask_path'].str.endswith('.png') & df['collage_path'].str.endswith('.jpg')]

    dfs.append(df)

concatenated_df = pd.concat(dfs, ignore_index=True)

# Creating my own data generator because the data is too large to fit in memory at once


In [None]:
import tensorflow as tf
import numpy as np
import math
from PIL import Image

class DataGenerator(tf.keras.utils.Sequence):
    def __init__(self, dataframe, batch_size, img_size):
        self.dataframe = dataframe
        self.batch_size = batch_size
        self.img_size = img_size
        
    def __len__(self):
        return len(self.dataframe) // self.batch_size
    
    def __getitem__(self, idx):
        batch_df = self.dataframe.iloc[idx * self.batch_size:(idx + 1) * self.batch_size]
        
        X = np.zeros((self.batch_size, *self.img_size, 3), dtype=np.float32)
        Y = np.zeros((self.batch_size, *self.img_size, 1), dtype=np.float32)
        
        for i, (_, row) in enumerate(batch_df.iterrows()):
            img = Image.open(row['image_path']).convert('RGB').resize(self.img_size)
            mask = Image.open(row['mask_path']).convert('L').resize(self.img_size)
            
            X[i] = np.array(img) / 255.0
            Y[i] = np.expand_dims(np.array(mask), axis=-1) / 255.0
        
        return X, Y

# Creating the model


In [None]:
from tensorflow.keras import layers, models

def unet_model(input_size=(256, 256, 3)):
    inputs = layers.Input(input_size)
    
    # Encoder (Downsampling)
    conv1 = layers.Conv2D(64, 3, activation='relu', padding='same')(inputs)
    conv1 = layers.BatchNormalization()(conv1)
    conv1 = layers.Conv2D(64, 3, activation='relu', padding='same')(conv1)
    conv1 = layers.BatchNormalization()(conv1)
    pool1 = layers.MaxPooling2D(pool_size=(2, 2))(conv1)
    pool1 = layers.Dropout(0.5)(pool1)
    
    conv2 = layers.Conv2D(128, 3, activation='relu', padding='same')(pool1)
    conv2 = layers.BatchNormalization()(conv2)
    conv2 = layers.Conv2D(128, 3, activation='relu', padding='same')(conv2)
    conv2 = layers.BatchNormalization()(conv2)
    pool2 = layers.MaxPooling2D(pool_size=(2, 2))(conv2)
    pool2 = layers.Dropout(0.5)(pool2)
    
    conv3 = layers.Conv2D(256, 3, activation='relu', padding='same')(pool2)
    conv3 = layers.BatchNormalization()(conv3)
    conv3 = layers.Conv2D(256, 3, activation='relu', padding='same')(conv3)
    conv3 = layers.BatchNormalization()(conv3)
    pool3 = layers.MaxPooling2D(pool_size=(2, 2))(conv3)
    pool3 = layers.Dropout(0.5)(pool3)
    
    conv4 = layers.Conv2D(512, 3, activation='relu', padding='same')(pool3)
    conv4 = layers.BatchNormalization()(conv4)
    conv4 = layers.Conv2D(512, 3, activation='relu', padding='same')(conv4)
    conv4 = layers.BatchNormalization()(conv4)
    pool4 = layers.MaxPooling2D(pool_size=(2, 2))(conv4)
    pool4 = layers.Dropout(0.5)(pool4)
    
    # Bridge
    conv5 = layers.Conv2D(1024, 3, activation='relu', padding='same')(pool4)
    conv5 = layers.BatchNormalization()(conv5)
    conv5 = layers.Conv2D(1024, 3, activation='relu', padding='same')(conv5)
    conv5 = layers.BatchNormalization()(conv5)
    
    # Decoder (Upsampling)
    up6 = layers.Conv2D(512, 2, activation='relu', padding='same')(layers.UpSampling2D(size=(2, 2))(conv5))
    up6 = layers.BatchNormalization()(up6)
    merge6 = layers.concatenate([conv4, up6], axis=3)
    conv6 = layers.Conv2D(512, 3, activation='relu', padding='same')(merge6)
    conv6 = layers.BatchNormalization()(conv6)
    conv6 = layers.Conv2D(512, 3, activation='relu', padding='same')(conv6)
    conv6 = layers.BatchNormalization()(conv6)
    
    up7 = layers.Conv2D(256, 2, activation='relu', padding='same')(layers.UpSampling2D(size=(2, 2))(conv6))
    up7 = layers.BatchNormalization()(up7)
    merge7 = layers.concatenate([conv3, up7], axis=3)
    conv7 = layers.Conv2D(256, 3, activation='relu', padding='same')(merge7)
    conv7 = layers.BatchNormalization()(conv7)
    conv7 = layers.Conv2D(256, 3, activation='relu', padding='same')(conv7)
    conv7 = layers.BatchNormalization()(conv7)
    
    up8 = layers.Conv2D(128, 2, activation='relu', padding='same')(layers.UpSampling2D(size=(2, 2))(conv7))
    up8 = layers.BatchNormalization()(up8)
    merge8 = layers.concatenate([conv2, up8], axis=3)
    conv8 = layers.Conv2D(128, 3, activation='relu', padding='same')(merge8)
    conv8 = layers.BatchNormalization()(conv8)
    conv8 = layers.Conv2D(128, 3, activation='relu', padding='same')(conv8)
    conv8 = layers.BatchNormalization()(conv8)
    
    up9 = layers.Conv2D(64, 2, activation='relu', padding='same')(layers.UpSampling2D(size=(2, 2))(conv8))
    up9 = layers.BatchNormalization()(up9)
    merge9 = layers.concatenate([conv1, up9], axis=3)
    conv9 = layers.Conv2D(64, 3, activation='relu', padding='same')(merge9)
    conv9 = layers.BatchNormalization()(conv9)
    conv9 = layers.Conv2D(64, 3, activation='relu', padding='same')(conv9)
    conv9 = layers.BatchNormalization()(conv9)
    
    outputs = layers.Conv2D(1, 1, activation='sigmoid')(conv9)
    
    model = models.Model(inputs=inputs, outputs=outputs)
    return model

# Compiling the model and creating the training and validation data generators


In [None]:
model = unet_model()
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

train_df = concatenated_df.sample(frac=0.8, random_state=42)
val_df = concatenated_df.drop(train_df.index)

train_generator = DataGenerator(train_df, 16, (256, 256))
val_generator = DataGenerator(val_df, 16, (256, 256))

In [None]:
#publishing generator
train_generator = DataGenerator(concatenated_df, 16, (256, 256))

# Training the model


In [None]:
history = model.fit(
    train_generator,
    #validation_data=val_generator,
    epochs=20,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True),
        tf.keras.callbacks.ReduceLROnPlateau(factor=0.1, patience=3)
    ]
)

# Save the model and the weights to a file


In [None]:
model.save_weights('remove_background_weights.h5')
model.save('remove_background_model.h5')

# Some model testing


In [None]:
# Load the trained model
model = tf.keras.models.load_model('remove_background_model.h5', compile=False)

# Function to preprocess an image
def preprocess_image(image_path, img_size):
    img = Image.open(image_path).convert('RGB').resize(img_size)
    img_array = np.array(img) / 255.0
    return np.expand_dims(img_array, axis=0)

# Function to postprocess the mask
def postprocess_mask(mask):
    mask = (mask > 0.5).astype(np.uint8)
    return mask.squeeze()

# Function to apply mask to the image
def apply_mask(image, mask):
    return image * np.expand_dims(mask, axis=-1)

In [None]:
import matplotlib.pyplot as plt

# Test image path
test_image_path = '/kaggle/input/people-detection/dataset3/images/ds11_bluebells-woods-english-spring-160972.png'

# Derive expected mask path from the image path
expected_mask_path = os.path.join('/kaggle/input/people-detection/dataset3/masks', os.path.basename(test_image_path))

# Preprocess the image
input_image = preprocess_image(test_image_path, (256, 256))

# Predict the mask
predicted_mask = model.predict(input_image)
predicted_mask = postprocess_mask(predicted_mask)

# Load the original image for display
original_image = Image.open(test_image_path).resize((256, 256))
original_image_np = np.array(original_image) / 255.0

# Load the expected mask for display
expected_mask = Image.open(expected_mask_path).convert('L').resize((256, 256))
expected_mask_np = np.array(expected_mask) / 255.0

# Apply the mask to the original image
cropped_image = apply_mask(original_image_np, predicted_mask)

# Create an RGBA image with the mask as the alpha channel
rgba_image = np.zeros((256, 256, 4), dtype=np.float32)
rgba_image[..., :3] = cropped_image
rgba_image[..., 3] = predicted_mask

# Convert the RGBA image to PIL Image format
rgba_image_pil = Image.fromarray((rgba_image * 255).astype(np.uint8), 'RGBA')

# Save the RGBA image with transparent background
rgba_image_pil.save('cropped_image_with_transparent_background.png')

## Plotting of the results


In [None]:
# Plot the original image, predicted mask, expected mask, and cropped image
plt.figure(figsize=(16, 4))

plt.subplot(1, 4, 1)
plt.title('Original Image')
plt.imshow(original_image_np)
plt.axis('off')

plt.subplot(1, 4, 2)
plt.title('Expected Mask')
plt.imshow(expected_mask_np, cmap='gray')
plt.axis('off')

plt.subplot(1, 4, 3)
plt.title('Predicted Mask')
plt.imshow(predicted_mask, cmap='gray')
plt.axis('off')

plt.subplot(1, 4, 4)
plt.title('Cropped Image with Transparent Background')
plt.imshow(rgba_image_pil)
plt.axis('off')

plt.tight_layout()
plt.show()