In [3]:
import os
import torch
from PIL import Image
import numpy as np
from torchvision.utils import save_image
from torchvision import transforms
from astropy.io import fits
import requests
import tarfile
import os
import torch
from denoising_diffusion_pytorch import Unet, GaussianDiffusion, Trainer
import numpy as np
from torchvision import transforms
from PIL import Image
import psutil

# Function to downsample a 2048x2048 field to 256x256 by averaging adjacent pixels
def downsample_to_256x256(field):
    reshaped = field.reshape(256, 8, 256, 8)
    downsampled = reshaped.mean(axis=(1, 3))
    return downsampled

# Function to find the global min, max kappa values and max across all downsampled FITS files
def find_global_min_max_kappa(directory):
    min_kappa = np.inf
    max_kappa = -np.inf

    all_data = []  
    for i in range(1, 1024):
        file_name = f"WLconv_z2.00_{i:04d}r.fits"
        file_path = os.path.join(directory, file_name)

        if os.path.exists(file_path):
            with fits.open(file_path) as hdul:
                data = hdul[0].data
                downscaled_data = downsample_to_256x256(data)

                min_kappa = min(min_kappa, downscaled_data.min())
                max_kappa = max(max_kappa, downscaled_data.max())

    return min_kappa, max_kappa

# Function to read a FITS file, downscale it to 256x256, and normalize it
def downscale_and_norm_fits(file_path, min_kappa, max_kappa):
    with fits.open(file_path) as hdul:
        data = hdul[0].data   
    downscaled_data = downsample_to_256x256(data)
    downscaled_data = (downscaled_data - min_kappa) / (max_kappa - min_kappa)
    return downscaled_data
    
# Function to process FITS files: downsample, normalize, and save as torch CSVs
def process_fits_files_to_csvs(directory, output_directory):
    min_kappa, max_kappa = find_global_min_max_kappa(directory)
    print('Kappa min:', min_kappa)
    print('Kappa max:', max_kappa)

    # Ensure the output directory exists
    os.makedirs(output_directory, exist_ok=True)

    for i in range(1, 1024):
        file_name = f"WLconv_z2.00_{i:04d}r.fits"
        file_path = os.path.join(directory, file_name)

        if os.path.exists(file_path):
            downscaled_data = downscale_and_norm_fits(file_path, min_kappa, max_kappa)
            downscaled_tensor = torch.from_numpy(downscaled_data).float()
            save_path = os.path.join(output_directory, f"{file_name[:-5]}.csv")
            np.savetxt(save_path, downscaled_tensor.numpy(), delimiter=",")
            print(f"Processed and saved: {save_path}")
        else:
            print(f"File not found: {file_path}")

# Define the input and output directories
input_directory = '/home2/mgjacob/Diffusion/data/kappa'
output_directory = '/home2/mgjacob/Diffusion/data/kappa_lossless'

In [1]:
# Download and process files from Colombia Lensing

raw_data_directory = "raw"
url = "http://astronomy.nmsu.edu/aklypin/SUsimulations/MassiveNuS/convergence_maps/darkenergy//kappa/Om0.290_Ode0.710_w-1.000_wa0.000_si0.800.tar.gz"

# Ensure the raw_data directory exists
os.makedirs(raw_data_directory, exist_ok=True)

# Extract the filename from the URL
filename = url.split("/")[-1]
file_path = os.path.join(raw_data_directory, filename)

try:
    # Download the file
    print(f"Downloading {filename}...")
    response = requests.get(url, stream=True)
    response.raise_for_status()
    total_size = int(response.headers.get('content-length', 0))
    downloaded_size = 0

    with open(file_path, "wb") as file:
        for chunk in response.iter_content(chunk_size=8192):
            if chunk:  # Only write non-empty chunks
                file.write(chunk)
                downloaded_size += len(chunk)
                # Print progress
                progress = (downloaded_size / total_size) * 100
                print(f"\rProgress: {progress:.2f}%", end="")
    print(f"\nDownloaded {filename} to {file_path}.")
    
    # Extract the tar.gz file
    print(f"Extracting {filename}...")
    with tarfile.open(file_path, "r:gz") as tar:
        tar.extractall(path=raw_data_directory)
    print(f"Extracted files to {raw_data_directory}.")
    
    # Optionally, delete the tar.gz file after extraction
    os.remove(file_path)
    print(f"Removed the downloaded file {file_path}.")

except requests.exceptions.RequestException as e:
    print(f"Error downloading the file: {e}")
except tarfile.TarError as e:
    print(f"Error extracting the file: {e}")

#Contents will be saved to /raw/Om0.290_Ode0.710_w-1.000_wa0.000_si0.800/512b260/kappa in directory


Downloading Om0.290_Ode0.710_w-1.000_wa0.000_si0.800.tar.gz...
Progress: 100.00%
Downloaded Om0.290_Ode0.710_w-1.000_wa0.000_si0.800.tar.gz to raw/Om0.290_Ode0.710_w-1.000_wa0.000_si0.800.tar.gz.
Extracting Om0.290_Ode0.710_w-1.000_wa0.000_si0.800.tar.gz...


  tar.extractall(path=output_directory)


Extracted files to raw.
Removed the downloaded file raw/Om0.290_Ode0.710_w-1.000_wa0.000_si0.800.tar.gz.


In [4]:
# Process the FITS files and save them as grayscale PNGs
input_directory = 'raw/Om0.290_Ode0.710_w-1.000_wa0.000_si0.800/512b260/kappa/'
output_directory = 'kappa_maps/'
process_fits_files_to_csvs(input_directory, output_directory)

tester
Kappa min: -0.08201675
Kappa max: 0.7101586
Processed and saved: processed/WLconv_z2.00_0001r.csv
Processed and saved: processed/WLconv_z2.00_0002r.csv
Processed and saved: processed/WLconv_z2.00_0003r.csv
Processed and saved: processed/WLconv_z2.00_0004r.csv
Processed and saved: processed/WLconv_z2.00_0005r.csv
Processed and saved: processed/WLconv_z2.00_0006r.csv
Processed and saved: processed/WLconv_z2.00_0007r.csv
Processed and saved: processed/WLconv_z2.00_0008r.csv
Processed and saved: processed/WLconv_z2.00_0009r.csv
Processed and saved: processed/WLconv_z2.00_0010r.csv
Processed and saved: processed/WLconv_z2.00_0011r.csv
Processed and saved: processed/WLconv_z2.00_0012r.csv
Processed and saved: processed/WLconv_z2.00_0013r.csv
Processed and saved: processed/WLconv_z2.00_0014r.csv
Processed and saved: processed/WLconv_z2.00_0015r.csv
Processed and saved: processed/WLconv_z2.00_0016r.csv
Processed and saved: processed/WLconv_z2.00_0017r.csv
Processed and saved: processed/

In [None]:
model = Unet(
    dim = 64,
    dim_mults = (1, 2, 4, 8),
    flash_attn = False, 
    channels = 1
).cuda()

diffusion = GaussianDiffusion(
    model,
    image_size = 256,
    timesteps = 1000,    # number of steps
    sampling_timesteps = 250
).cuda()

trainer = Trainer(
    diffusion,
    '/kappa_maps/',
    train_batch_size = 32,
    train_lr = 8e-5,
    train_num_steps = 70000,         # total training steps
    gradient_accumulate_every = 2,    # gradient accumulation steps
    ema_decay = 0.995,                # exponential moving average decay
    amp = True,                       # turn on mixed precision
    calculate_fid = False # whether to calculate fid during training
)

trainer.train()