In [None]:
import os
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from osgeo import gdal
from skimage import exposure

In [None]:
# Helper functions to read images
def read_image(file_path):
    """
    Reads an image file. Determines if it's RGB or multispectral based on the file extension.
    """
    if file_path.lower().endswith(('.tif', '.tiff')):
        dataset = gdal.Open(file_path)
        bands = [dataset.GetRasterBand(i).ReadAsArray() for i in range(1, dataset.RasterCount + 1)]
        image = np.stack(bands, axis=-1)
    else:
        image = cv2.imread(file_path, cv2.IMREAD_COLOR)
    return image

In [None]:
def preprocess_image(image):
    """
    Preprocesses an image (e.g., histogram equalization).
    """
    if len(image.shape) == 3 and image.shape[2] == 3:  # RGB image
        for i in range(3):
            image[:, :, i] = exposure.equalize_hist(image[:, :, i])
    else:  # Grayscale or single-band image
        image = exposure.equalize_hist(image)
    return image

In [None]:
def process_directory(directory):
    """
    Processes all images in the given directory.
    """
    images = []
    for root, _, files in os.walk(directory):
        for file in files:
            if file.lower().endswith(('.jpg', '.jpeg', '.png', '.tif', '.tiff')):
                file_path = os.path.join(root, file)
                image = read_image(file_path)
                processed_image = preprocess_image(image)
                images.append(processed_image)
    return images

In [None]:
def integrate_images(images):
    """
    Integrates multiple images into a single multimodal image.
    """
    integrated_image = np.concatenate(images, axis=-1)
    return integrated_image

In [None]:
# Helper functions to display images
def get_image_statistics(image):
    """
    Computes the mean and standard deviation of an image.
    """
    if image is not None:
        mean = np.mean(image, axis=(0, 1))
        std = np.std(image, axis=(0, 1))
        return mean, std
    return None, None

In [None]:
# Summarizing the dataset
def summarize_dataset(directory):
    """
    Summarizes the dataset by computing the mean and standard deviation of each image.
    """
    summary = []
    for root, _, files in os.walk(directory):
        for file in files:
            if file.lower().endswith(('.jpg', '.jpeg', '.png', '.tif', '.tiff')):
                file_path = os.path.join(root, file)
                image = read_image(file_path)
                mean, std = get_image_statistics(image)
                summary.append({
                    'file_path': file_path,
                    'mean': mean,
                    'std': std,
                    'shape': image.shape if image is not None else None
                })
    return pd.DataFrame(summary)

In [None]:
# Visualizing the dataset
def visualize_summary(summary_df):
    """
    Visualizes the summary of the dataset.
    """
    # Mean and Standard Deviation Plots
    means = np.stack(summary_df['mean'].dropna().values)
    stds = np.stack(summary_df['std'].dropna().values)
    
    plt.figure(figsize=(12, 6))
    plt.subplot(1, 2, 1)
    plt.hist(means, bins=30, alpha=0.7, label=['Band {}'.format(i+1) for i in range(means.shape[1])])
    plt.title('Distribution of Mean Pixel Values')
    plt.xlabel('Mean Pixel Value')
    plt.ylabel('Frequency')
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.hist(stds, bins=30, alpha=0.7, label=['Band {}'.format(i+1) for i in range(stds.shape[1])])
    plt.title('Distribution of Standard Deviation of Pixel Values')
    plt.xlabel('Standard Deviation of Pixel Value')
    plt.ylabel('Frequency')
    plt.legend()

    plt.tight_layout()
    plt.show()

In [None]:
# Define the root path of the dataset
dataset_root_path = '/Users/izzymohamed/Desktop/Vision For Social Good/DATA/Cherry'

In [None]:
# Example directories (update paths based on your actual dataset structure)
aerial_uav_path = os.path.join(dataset_root_path, '03_11_2021/Aerial_UAV_photos')
ground_rgb_path = os.path.join(dataset_root_path, '03_11_2021/Ground_RGB_Photos')
ground_multispectral_path = os.path.join(dataset_root_path, '03_11_2021/Ground_Multispectral_Photos')

In [11]:
# Process images from each modality
aerial_uav_images = process_directory(aerial_uav_path)
ground_rgb_images = process_directory(ground_rgb_path)
ground_multispectral_images = process_directory(ground_multispectral_path)

In [12]:
# Integrate images from all modalities
# all_images = aerial_uav_images + ground_rgb_images + ground_multispectral_images
# integrated_image = integrate_images(all_images)

# print("Integrated image shape:", integrated_image.shape)

In [13]:
# Summarize each dataset
aerial_uav_summary = summarize_dataset(aerial_uav_path)
ground_rgb_summary = summarize_dataset(ground_rgb_path)
ground_multispectral_summary = summarize_dataset(ground_multispectral_path)

In [14]:
# Combine summaries
all_summary = pd.concat([aerial_uav_summary, ground_rgb_summary, ground_multispectral_summary], ignore_index=True)

In [15]:
# Save summary to CSV
all_summary.to_csv('dataset_summary.csv', index=False)

In [16]:
# Visualize summary
visualize_summary(all_summary)

ValueError: all input arrays must have the same shape