# **Part 1: Import libraries and data processing**


In [None]:
!pip install pytorch_lightning -U torchinfo segmentation_models_pytorch open3d albumentations opencv-python-headless scipy scikit-image

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
import os
from os.path import join
import glob
import sys
import random
import warnings
from tqdm import tqdm
import itertools
from itertools import chain
from skimage.io import imread, imshow, imread_collection, concatenate_images
from skimage.transform import resize
from skimage.morphology import label
from sklearn.model_selection import train_test_split
from IPython.display import Image
from skimage import io
import torchvision
import torchvision.models.resnet as resnet
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
import pytorch_lightning as pl
from pytorch_lightning import seed_everything
from pytorch_lightning.callbacks import ModelCheckpoint
from sklearn.preprocessing import MinMaxScaler
import segmentation_models_pytorch as smp
from torchinfo import summary
import open3d as o3d
import albumentations as A
from albumentations.pytorch import ToTensorV2
from scipy.spatial.transform import Rotation as R
from scipy.spatial import KDTree


seed_everything(42, workers=True)
torch.use_deterministic_algorithms(True)
import os
os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"

%matplotlib inline

# **Part 2: Data Loading and Preprocessing**

In [None]:


# Step 1: Set paths and parameters
dataset_path = "./dataset/"  # Update this path to your dataset location
image_size = (256, 256)  # Resize dimensions

# Step 2: Load dataset
images = []
depths = []

# Assuming dataset structure: ./dataset/images/ and ./dataset/depth/
image_dir = os.path.join(dataset_path, 'images')
depth_dir = os.path.join(dataset_path, 'depth')

image_files = sorted(os.listdir(image_dir))
depth_files = sorted(os.listdir(depth_dir))

for img_file, depth_file in zip(image_files, depth_files):
    # Read RGB image
    img = cv2.imread(os.path.join(image_dir, img_file))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, image_size)
    images.append(img)

    # Read depth map
    depth = cv2.imread(os.path.join(depth_dir, depth_file), cv2.IMREAD_UNCHANGED)
    depth = cv2.resize(depth, image_size)
    depths.append(depth)

images = np.array(images, dtype=np.float32) / 255.0  # Normalize images
depths = np.array(depths, dtype=np.float32)

print(f"Images Shape: {images.shape}, Depths Shape: {depths.shape}")

# Step 3: Split data into training, validation, and testing sets
X_train, X_temp, y_train, y_temp = train_test_split(images, depths, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

print("Training Set Shape:", X_train.shape, y_train.shape)
print("Validation Set Shape:", X_val.shape, y_val.shape)
print("Testing Set Shape:", X_test.shape, y_test.shape)

# Step 4: Data Augmentation (optional)
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Example: Augment training images
data_gen = ImageDataGenerator(
    rotation_range=10,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True
)

data_gen.fit(X_train)

# Display a few augmented images
for X_batch, y_batch in data_gen.flow(X_train, y_train, batch_size=5):
    for i in range(5):
        plt.subplot(1, 5, i+1)
        plt.imshow(X_batch[i])
        plt.axis('off')
    plt.show()
    break

# Step 5: Save preprocessed data
np.savez_compressed('preprocessed_data.npz', 
                     X_train=X_train, y_train=y_train, 
                     X_val=X_val, y_val=y_val, 
                     X_test=X_test, y_test=y_test)

print("Preprocessed dataset saved successfully!")
