# 1. Generating InSAR Dataset

In [None]:
import torch
from torch.utils.data import Dataset
import numpy as np
from scipy.ndimage import gaussian_filter
import os
import pandas as pd
import random

In [None]:
class InSARSyntheticData(Dataset):
    def __init__(self, size, full_dem, baselines, wavelength, S_max=5.0, D=50, nu=0.25, cm=1.0, V=1.0,
                 random_noise_std=0.56, tropospheric_noise_beta=1.82, tropospheric_noise_scale=1.0,
                 total_days=1460, interval_days=49, f_t=-1, orbit_type='ascending'):
        
        # Size of image
        self.size = size

        # Time & Angle parameters
        self.total_days = total_days
        self.interval_days = interval_days
        self.orbit_type = orbit_type
        self.incidence_angle_deg, self.satellite_azimuth_deg = self._get_orbit_geometry()
        self.los_vector = InSARSyntheticData.calculate_los_vector(self.incidence_angle_deg, self.satellite_azimuth_deg)
        self.times = self.get_times()

        # Subsidence Parameters
        self.S_max = S_max
        self.D = D
        self.nu = nu
        self.cm = cm
        self.V = V
        self.delta_P_final = -self.S_max * ((np.pi * self.D**2) / (self.cm * (1 - self.nu) * self.V))
        self.r, self.subsidence_denominator, self.azimuth = InSARSyntheticData._generate_subsidence_constants(size, D)
        self.f_t = f_t

        # Guassian noise parameters
        self.random_noise_std = random_noise_std

        # Tropospheric noise parameters
        self.tropospheric_noise_beta = tropospheric_noise_beta
        self.tropospheric_noise_scale = tropospheric_noise_scale
        self.power_array = InSARSyntheticData._calculate_power_array(size, tropospheric_noise_beta)

        # Topographic noise parameters
        self.baselines = baselines
        self.wavelength = wavelength
        self.full_dem = full_dem
        SLANT_RANGE = 850000
        self.topographic_denominator = self.wavelength * SLANT_RANGE * np.sin(np.deg2rad(self.incidence_angle_deg))
    
    def __len__(self):
        return len(self.times)

    def generate_subsidence(self, delta_P):
        factor = (-1 / np.pi) * self.cm * (1 - self.nu) * delta_P * self.V
        uz = factor * (self.D / (self.subsidence_denominator**1.5))
        ur = factor * (self.r / (self.subsidence_denominator**1.5))
        ux = ur * np.cos(self.azimuth)
        uy = ur * np.sin(self.azimuth)
        los_vector_calc = self.los_vector
        simulated_interferogram = (ux * los_vector_calc[0]) + \
                                  (uy * los_vector_calc[1]) + \
                                  (uz * los_vector_calc[2])
        return simulated_interferogram
    
    def _get_clean_subsidence_image(self, t):
        current_time_factor = 0.0
        if callable(self.f_t):
            if len(self.times) > 0:
                current_time_factor = self.f_t(t, self.times, self.total_time)
            else:
                current_time_factor = t / self.total_time if self.total_time > 0 else 0
        else: 
            current_time_factor = t / self.total_time if self.total_time > 0 else 0
        delta_P_current = -self.delta_P_final * current_time_factor
        return self.generate_subsidence(delta_P=delta_P_current)

    def generate_random_noise(self):
        return np.random.normal(loc=0.0, scale=self.random_noise_std, size=self.size)

    def generate_tropospheric_noise(self):
        noise = np.fft.fft2(np.random.randn(*self.size))
        frac_noise = np.fft.ifft2(noise * self.power_array).real
        std_val = frac_noise.std()
        if std_val > 1e-9:
            frac_noise = (frac_noise - frac_noise.mean()) / std_val
        else:
            frac_noise = frac_noise - frac_noise.mean()
        return frac_noise * self.tropospheric_noise_scale

    def generate_topographic_noise(self, dem_patch):
        delta_h_map = InSARSyntheticData._generate_dem_error(dem_patch, std=5.0)
        b_perp = np.random.choice(self.baselines)
        numerator = -4 * np.pi * b_perp
        if abs(self.topographic_denominator) < 1e-9:
            return np.zeros(self.size)
        topo_phase_noise = (numerator / self.topographic_denominator) * delta_h_map
        return topo_phase_noise

    def get_times(self):
        if self.total_days < 1 or self.interval_days <=0:
             return np.array([1.0])
        return np.arange(1, self.total_days + 1, self.interval_days)

    def _get_orbit_geometry(self):
        if self.orbit_type == 'ascending': return 40, 15
        elif self.orbit_type == 'descending': return 40, 195
        else: raise ValueError("Invalid orbit type.")

    def __getitem__(self, idx):
        max_y = self.full_dem.shape[0] - self.size[0]
        max_x = self.full_dem.shape[1] - self.size[1]
        
        start_y = random.randint(0, max_y)
        start_x = random.randint(0, max_x)
        
        dem_patch = self.full_dem[
            start_y : start_y + self.size[0],
            start_x : start_x + self.size[1]
        ]

        current_time = self.times[idx]
        
        clean_image = self._get_clean_subsidence_image(current_time)
        noise1_random = self.generate_random_noise()
        noise1_tropo = self.generate_tropospheric_noise()
        noise1_topo = self.generate_topographic_noise(dem_patch)
        noisy_image1 = clean_image + noise1_random + noise1_tropo + noise1_topo
        
        noise2_random = self.generate_random_noise()
        noise2_tropo = self.generate_tropospheric_noise()
        noise2_topo = self.generate_topographic_noise(dem_patch)
        
        noisy_image2 = clean_image + noise2_random + noise2_tropo + noise2_topo
        clean_image_tensor = torch.from_numpy(clean_image.copy()).float().unsqueeze(0)
        noisy_image1_tensor = torch.from_numpy(noisy_image1.copy()).float().unsqueeze(0)
        noisy_image2_tensor = torch.from_numpy(noisy_image2.copy()).float().unsqueeze(0)
        return noisy_image1_tensor, noisy_image2_tensor, clean_image_tensor
    
    @staticmethod
    def _generate_subsidence_constants(size, D):
        y, x = np.indices(size)
        cx, cy = size[1] // 2, size[0] // 2
        r_sq = (x - cx)**2 + (y - cy)**2
        r = np.sqrt(r_sq)
        denominator_base = r**2 + D**2
        denominator_base[denominator_base < 1e-9] = 1e-9
        azimuth = np.arctan2(y - cy, x - cx)
        return r, denominator_base, azimuth
    
    @staticmethod
    def _calculate_los_vector(incidence_angle_deg, satellite_azimuth_deg):
        incidence_angle_rad = np.deg2rad(incidence_angle_deg)
        satellite_azimuth_rad = np.deg2rad(satellite_azimuth_deg)
        look_azimuth_rad = satellite_azimuth_rad + np.pi/2
        l_east = np.sin(incidence_angle_rad) * np.sin(look_azimuth_rad)
        l_north = np.sin(incidence_angle_rad) * np.cos(look_azimuth_rad)
        l_up = np.cos(incidence_angle_rad)
        return np.array([l_east, l_north, l_up])
    
    @staticmethod
    def _calculate_power_array(size, beta):
        ky = np.fft.fftfreq(size[0])
        kx = np.fft.fftfreq(size[1])
        kx, ky = np.meshgrid(kx, ky)
        k = np.sqrt(kx**2 + ky**2)
        k[0, 0] = 1e-7
        power = k ** (-beta)
        return power

    @staticmethod
    def _generate_dem_error(dem_patch, std=5.0):
        smoothed_dem = gaussian_filter(dem_patch, sigma=std)
        roughness = dem_patch - smoothed_dem
        zero_mean_roughness = roughness - roughness.mean()
        roughness_std = zero_mean_roughness.std()
        if roughness_std > 1e-9:
            normalized_roughness = zero_mean_roughness / roughness_std
        else:
            normalized_roughness = zero_mean_roughness
        dem_error = normalized_roughness * std
        return dem_error
        
def f_linear(t, times_array, total_time_val):
    if total_time_val == 0: return 0
    return t / total_time_val

def f_log(t, times_array, total_time_val):
    if total_time_val <= 0: return 0
    if t <= 0: t = 1e-6 
    min_time_in_series = times_array[0] if len(times_array)>0 and times_array[0] > 0 else 1.0
    adjusted_t = (t - min_time_in_series) + 1
    adjusted_total_time = (total_time_val - min_time_in_series) + 1
    if adjusted_total_time <= 1:
        return 1.0 if t >= total_time_val else (t/total_time_val if total_time_val > 0 else 0)
    val = np.log1p(max(0, adjusted_t-1)) / np.log1p(max(1e-7, adjusted_total_time-1))
    return min(max(0, val), 1.0)

In [None]:
IMG_SIZE = (1500, 1500)
S_MAX = 5.0
D_DEPTH = 50
NU = 0.25
CM = 1.0
V_PARAM = 1.0
RANDOM_NOISE_STD = 0.56
TROPOSPHERIC_NOISE_BETA = 1.82
TROPOSPHERIC_NOISE_SCALE = 1.0
TOTAL_DAYS = 1460
INTERVAL_DAYS = 49

PRECOMPUTED_DATA_ROOT = r"Datasets/TopographicData"

def LoadDem(dem_path):
    data_type = np.dtype('>f4')
    raw_data = np.fromfile(dem_path, dtype=data_type)
    dem_data = raw_data.reshape((12602, 11702))
    return dem_data

def LoadBaselines(baseline_type='Sentinel-1'):
    if baseline_type == 'Sentinel-1': 
        baselines_path = 'baselines_sentinel1.txt'
        wavelength = 0.056
        col_to_use = 2
    elif baseline_type == 'PALSAR-2': 
        baselines_path = 'baselines_PALSAR-2.txt'
        wavelength = 0.24
        col_to_use = 1
    df = pd.read_csv(baselines_path, delim_whitespace=True, header=None, usecols=[col_to_use])
    baselines_list = df.iloc[:, 0].values
    centered_baselines = baselines_list - np.mean(baselines_list)
    return centered_baselines.tolist()

DEM = LoadDem('Kyoto-Osaka.dehm') 
SENTINEL1_BASELINES = LoadBaselines(baseline_type='Sentinel-1')
PALSAR2_BASELINES = LoadBaselines(baseline_type='PALSAR-2')

def generate_and_save_data():
    print(f"Starting data generation. Files will be saved to: {PRECOMPUTED_DATA_ROOT}")
    os.makedirs(os.path.join(PRECOMPUTED_DATA_ROOT, 'noisy1'), exist_ok=True)
    os.makedirs(os.path.join(PRECOMPUTED_DATA_ROOT, 'noisy2'), exist_ok=True)
    os.makedirs(os.path.join(PRECOMPUTED_DATA_ROOT, 'clean'), exist_ok=True)

    manifest_data = []
    global_sample_idx = 0

    dataset_configs = [
        # Linear ('f_t': -1), Sentinel-1
        {'name': 'lin_asc_16_s1', 'f_t': -1, 'orbit': 'ascending', 'interval': 16, 'baselines': SENTINEL1_BASELINES, 'wavelength': 0.056},
        {'name': 'lin_desc_16_s1', 'f_t': -1, 'orbit': 'descending', 'interval': 16, 'baselines': SENTINEL1_BASELINES, 'wavelength': 0.056},

        # Linear ('f_t': -1), PALSAR-2
        {'name': 'lin_asc_16_p2', 'f_t': -1, 'orbit': 'ascending', 'interval': 16, 'baselines': PALSAR2_BASELINES, 'wavelength': 0.24},
        {'name': 'lin_desc_16_p2', 'f_t': -1, 'orbit': 'descending', 'interval': 16, 'baselines': PALSAR2_BASELINES, 'wavelength': 0.24},

        # Log ('f_t': f_log), Sentinel-1
        {'name': 'log_asc_16_s1', 'f_t': f_log, 'orbit': 'ascending', 'interval': 16, 'baselines': SENTINEL1_BASELINES, 'wavelength': 0.056},
        {'name': 'log_desc_16_s1', 'f_t': f_log, 'orbit': 'descending', 'interval': 16, 'baselines': SENTINEL1_BASELINES, 'wavelength': 0.056},

        # Log ('f_t': f_log), PALSAR-2
        {'name': 'log_asc_16_p2', 'f_t': f_log, 'orbit': 'ascending', 'interval': 16, 'baselines': PALSAR2_BASELINES, 'wavelength': 0.24},
        {'name': 'log_desc_16_p2', 'f_t': f_log, 'orbit': 'descending', 'interval': 16, 'baselines': PALSAR2_BASELINES, 'wavelength': 0.24},
    ]

    for config in dataset_configs:
        print(f"Generating data for config: {config['name']}")
        gen_dataset = Noise2NoiseDataset(
            size=IMG_SIZE, S_max=S_MAX, D=D_DEPTH, nu=NU, cm=CM, V=V_PARAM,
            random_noise_std=RANDOM_NOISE_STD, tropospheric_noise_beta=TROPOSPHERIC_NOISE_BETA,
            tropospheric_noise_scale=TROPOSPHERIC_NOISE_SCALE, total_days=TOTAL_DAYS,
            interval_days=INTERVAL_DAYS, f_t=config['f_t'], orbit_type=config['orbit'], baselines=config['baselines'], 
            full_dem=DEM, wavelength=config['wavelength'])
        
        if len(gen_dataset) == 0:
            print(f"Warning: No samples generated for config {config['name']}. Check total_days and interval_days.")
            continue

        for i in range(len(gen_dataset)):
            try:
                noisy1_tensor, noisy2_tensor, clean_tensor = gen_dataset[i]

                noisy1_fname = os.path.join('noisy1', f'sample_{global_sample_idx:06d}_noisy1.pt')
                noisy2_fname = os.path.join('noisy2', f'sample_{global_sample_idx:06d}_noisy2.pt')
                clean_fname = os.path.join('clean', f'sample_{global_sample_idx:06d}_clean.pt')

                torch.save(noisy1_tensor, os.path.join(PRECOMPUTED_DATA_ROOT, noisy1_fname))
                torch.save(noisy2_tensor, os.path.join(PRECOMPUTED_DATA_ROOT, noisy2_fname))
                torch.save(clean_tensor, os.path.join(PRECOMPUTED_DATA_ROOT, clean_fname))

                manifest_data.append({
                    'id': global_sample_idx,
                    'config_name': config['name'],
                    'original_idx_in_config': i,
                    'time_step': gen_dataset.times[i],
                    'noisy1_path': noisy1_fname,
                    'noisy2_path': noisy2_fname,
                    'clean_path': clean_fname
                })
                global_sample_idx += 1
                if global_sample_idx % 10 == 0:
                    print(f"Saved sample {global_sample_idx}...")

            except Exception as e:
                print(f"Error generating/saving sample {global_sample_idx} (original index {i} in config {config['name']}): {e}")
                continue
        print(f"Finished generating for config: {config['name']}. Total samples so far: {global_sample_idx}")


    manifest_df = pd.DataFrame(manifest_data)
    manifest_path = os.path.join(PRECOMPUTED_DATA_ROOT, 'manifest.csv')
    manifest_df.to_csv(manifest_path, index=False)
    print(f"Data generation complete. Total samples: {global_sample_idx}. Manifest saved to {manifest_path}")

generate_and_save_data()

  df = pd.read_csv(baselines_path, delim_whitespace=True, header=None, usecols=[col_to_use])
  df = pd.read_csv(baselines_path, delim_whitespace=True, header=None, usecols=[col_to_use])


Starting data generation. Files will be saved to: Datasets/TopographicData
Generating data for config: lin_asc_16_s1
Saved sample 10...
Saved sample 20...
Saved sample 30...
Finished generating for config: lin_asc_16_s1. Total samples so far: 30
Generating data for config: lin_desc_16_s1
Saved sample 40...
Saved sample 50...
Saved sample 60...
Finished generating for config: lin_desc_16_s1. Total samples so far: 60
Generating data for config: lin_asc_16_p2
Saved sample 70...
Saved sample 80...
Saved sample 90...
Finished generating for config: lin_asc_16_p2. Total samples so far: 90
Generating data for config: lin_desc_16_p2
Saved sample 100...
Saved sample 110...
Saved sample 120...
Finished generating for config: lin_desc_16_p2. Total samples so far: 120
Generating data for config: log_asc_16_s1
Saved sample 130...
Saved sample 140...
Saved sample 150...
Finished generating for config: log_asc_16_s1. Total samples so far: 150
Generating data for config: log_desc_16_s1
Saved sample 16