### Process AIA Data, check for NaN values, normalize the data by dividing the
exposure time, plot the histograms to check for saturation levels.

In [None]:
# Import libraries
import sunpy.map
from sunpy.net import Fido, attrs as a
from astropy import units as u
import matplotlib.pyplot as plt

**Trying out simple plotting with one FITs file**

In [None]:
file = "/mnt/data/SDO-AIA/94/2023-08-05T21:10:00.fits"

In [None]:
import sunpy.map
import numpy as np

file = "/mnt/data/SDO-AIA/94/2023-08-05T21:10:00.fits"
aia_map = sunpy.map.Map(file)
data = aia_map.data.flatten()
# Check for NaNs
num_nans = np.isnan(data).sum()
print(f"Number of NaNs: {num_nans}")
# Check for infinities
num_infs = np.isinf(data).sum()
print(f"Number of infinite values: {num_infs}")
# Check min and max values
print(f"Data min: {np.nanmin(data)}")
print(f"Data max: {np.nanmax(data)}")

In [None]:
aia_map = sunpy.map.Map(file)
aia_map.plot()
plt.colorbar()
plt.title('AIA 94 Å')
plt.show()

In [None]:
import matplotlib.colors as colors
from sunpy.visualization.colormaps import color_tables as ct
aia_map.plot(norm=colors.LogNorm(vmin=10, vmax=aia_map.data.max()))
plt.colorbar()
plt.title('AIA 94 Å (Log Scale)')
plt.show()

In [None]:
import matplotlib.pyplot as plt
import matplotlib.colors as colors
from sunpy.visualization.colormaps import color_tables as ct
import astropy.units as u
# Check data positivity
print(f"Min of aia_map.data: {aia_map.data.min()}")
fig = plt.figure()
ax = plt.subplot(projection=aia_map)
im = aia_map.plot(cmap=ct.aia_color_table(94 * u.angstrom),
             norm=colors.LogNorm(vmin=10, vmax=aia_map.data.max()))
plt.colorbar(im, ax=ax)
plt.title('AIA 94 Å (Log Scale)')
plt.show()

In [None]:
aia_map.data

In [None]:
plt.imshow(aia_map.data)

In [None]:
from astropy.io import fits
import numpy as np
from pathlib import Path
from tqdm import tqdm
import random

# Parameters
data_folder = Path("/mnt/data/SDO-AIA/94")
sample_size = 1000  # number of files to sample
max_workers = 8  # optional for parallelization later

# Collect and sample FITS files
fits_files = sorted(data_folder.glob("*.fits"))
if len(fits_files) < sample_size:
    sample_files = fits_files
else:
    sample_files = random.sample(fits_files, sample_size)

total_files = 0
total_pixels = 0
total_nans = 0
files_with_nans = []

print(f"Checking for NaN values in {len(sample_files)} randomly sampled FITS files:\n")

for file in tqdm(sample_files, desc="Checking NaNs"):
    try:
        with fits.open(file, memmap=True) as hdul:
            data = hdul[1].data if len(hdul) > 1 else hdul[0].data
            num_pixels = data.size
            num_nans = np.isnan(data).sum()
        if num_nans > 0:
            print(f"{file.name}: NaNs = {num_nans} / {num_pixels} "
                  f"({(num_nans / num_pixels * 100):.6f}%)")
            files_with_nans.append(file.name)
        total_files += 1
        total_pixels += num_pixels
        total_nans += num_nans
    except Exception as e:
        print(f"{file.name}: Failed to process - {e}")
print("\nSummary:")
print(f"Total sampled files checked: {total_files}")
print(f"Total pixels checked: {total_pixels}")
print(f"Total NaNs found: {total_nans} "
      f"({(total_nans / total_pixels * 100 if total_pixels else 0):.6f}%)")
if files_with_nans:
    print("\nFiles with NaNs:")
    for fname in files_with_nans:
        print(f" - {fname}")
else:
    print("\nNo files with NaNs detected in the sampled set.")

In [None]:
from astropy.io import fits
import numpy as np
from pathlib import Path
from tqdm import tqdm
import random

def check_nan_in_wavelength_folder(folder_path, wavelength, sample_size=1000):
    folder = Path(folder_path)
    fits_files = sorted(folder.glob("*.fits"))

    if len(fits_files) == 0:
        print(f"No FITS files found in {folder_path}")
        return
    if len(fits_files) < sample_size:
        sample_files = fits_files
    else:
        sample_files = random.sample(fits_files, sample_size)
    total_files = 0
    total_pixels = 0
    total_nans = 0
    files_with_nans = []
    print(f"\nChecking NaN values for AIA {wavelength} Å ({len(sample_files)} sampled files):")

    for file in tqdm(sample_files, desc=f"Checking {wavelength} Å"):
        try:
            with fits.open(file, memmap=True) as hdul:
                data = hdul[1].data if len(hdul) > 1 else hdul[0].data
                num_pixels = data.size
                num_nans = np.isnan(data).sum()

            if num_nans > 0:
                print(f"{file.name}: NaNs = {num_nans} / {num_pixels} "
                      f"({(num_nans / num_pixels * 100):.6f}%)")
                files_with_nans.append(file.name)
            total_files += 1
            total_pixels += num_pixels
            total_nans += num_nans
        except Exception as e:
            print(f"{file.name}: Failed to process - {e}")

    print(f"\nSummary for {wavelength} Å:")
    print(f"Total sampled files checked: {total_files}")
    print(f"Total pixels checked: {total_pixels}")
    print(f"Total NaNs found: {total_nans} "
          f"({(total_nans / total_pixels * 100 if total_pixels else 0):.6f}%)")
    if files_with_nans:
        print(f"\nFiles with NaNs in {wavelength} Å:")
        for fname in files_with_nans:
            print(f" - {fname}")
    else:
        print(f"\nNo files with NaNs detected in the sampled set for {wavelength} Å.")

#for reproducible sampling
# random.seed(42)
# Define your wavelength folders
base_path = "/mnt/data/SDO-AIA"
wavelength_folders = {
    "94": f"{base_path}/94",
    "131": f"{base_path}/131",
    "171": f"{base_path}/171",
    "193": f"{base_path}/193",
    "211": f"{base_path}/211",
    "304": f"{base_path}/304",
}

# Run check for each wavelength
for wavelength, folder_path in wavelength_folders.items():
    check_nan_in_wavelength_folder(folder_path, wavelength, sample_size=1000)

### Now we want to Normalize the exposure  by dividing all the pixel intensities values by expose value

In [None]:
## check start and end dates in the folder
import datetime
# Folder path
data_folder = Path("/mnt/data/SDO-AIA/94")
# Collect all FITS files
fits_files = sorted(data_folder.glob("*.fits"))
dates = []
for file in fits_files:
    try:
        # Extract date from filename
        fname = file.stem  # removes .fits
        # Example: '2023-08-05T21:10:00'
        date_part = fname.split(".")[0]
        dt = datetime.fromisoformat(date_part)
        dates.append(dt)
    except Exception as e:
        print(f"{file.name}: Skipped - {e}")
if dates:
    first_date = min(dates)
    last_date = max(dates)
    print("Based on filenames:")
    print(f"First (earliest) file date: {first_date.isoformat()}")
    print(f"Last (latest) file date:    {last_date.isoformat()}")
    print(f"Total files scanned: {len(dates)}")

In [None]:
## Select the specific days for the normalization and histogram plot
from pathlib import Path
from datetime import datetime

# Define your target dates
target_dates = {
    "2023-07-02",
    "2023-07-15",
    "2023-07-31",
    "2023-08-13"
}

# Folder containing your FITS files
data_folder = Path("/mnt/data/SDO-AIA/94")
fits_files = sorted(data_folder.glob("*.fits"))
specific_dates_files = []
for file in fits_files:
    try:
        # Extract date from filename
        fname = file.stem  # '2023-07-02T21:10:00'
        date_str = fname.split("T")[0]  # '2023-07-02'
        if date_str in target_dates:
            specific_dates_files.append(file)
    except Exception as e:
        print(f"{file.name}: Skipped - {e}")
print(f"\nTotal files matching the selected target dates: {len(specific_dates_files)}")
# print("Examples:")
# for f in specific_dates_files[:10]:
#     print(f" - {f.name}")

### Normalize the target_dates FITS files

In [None]:
## Apply Normalization Step

import sunpy.map
import numpy as np
from pathlib import Path
from tqdm.notebook import tqdm

# Define target dates
target_dates = {
    "2023-07-02",
    "2023-07-15",
    "2023-07-31",
    "2023-08-13"
}

data_folder = Path("/mnt/data/SDO-AIA/94")
fits_files = sorted(data_folder.glob("*.fits"))
specific_dates_files = []
for file in tqdm(fits_files, desc="Filtering files by date", leave=True):
    try:
        fname = file.stem
        if "T" in fname:
            date_str = fname.split("T")[0]
            if date_str in target_dates:
                specific_dates_files.append(file)
        else:
            tqdm.write(f"{file.name}: Filename does not contain 'T', skipped.")
    except Exception as e:
        tqdm.write(f"{file.name}: Error during date parsing - {e}")
        continue
print(f"\nTotal files matching target dates: {len(specific_dates_files)}")

## Normalization
all_normalized_data = []
for file in tqdm(specific_dates_files, desc="Normalizing FITS files", leave=True):
    try:
        aia_map = sunpy.map.Map(file)
        exptime = aia_map.exposure_time.value
        normalized_data = aia_map.data / exptime
        ## Mask invalid values
        normalized_data = normalized_data[np.isfinite(normalized_data)]
        normalized_data = normalized_data[normalized_data > 0]
        all_normalized_data.append(normalized_data)
    except Exception as e:
        tqdm.write(f"{file.name}: Failed during normalization - {e}")
        continue
if all_normalized_data:
    try:
        combined_data = np.concatenate(all_normalized_data)
        print(f"\nTotal valid normalized pixels collected: {combined_data.size}")
    except MemoryError as e:
        print("\nMemoryError: Too many pixels to concatenate. Consider downsampling per file.")
else:
    print("\nNo valid data collected after normalization.")

In [None]:
##Get the exposure time with SunPy for A94 folder data
import sunpy.map
import numpy as np
from pathlib import Path
from tqdm.notebook import tqdm

# Folder with your AIA 94 Å files
data_folder = Path("/mnt/data/SDO-AIA/94")
fits_files = sorted(data_folder.glob("*.fits"))

# Example: Compute and print the mean normalized intensity for each file
for file in tqdm(fits_files, desc="Normalizing AIA 94 wavelength by exposure time"):
    try:
        aia_map = sunpy.map.Map(file)
        exptime = aia_map.exposure_time.value  # in seconds
        # Normalize pixel data
        normalized_data = aia_map.data / exptime
        # Clean NaNs/Infs if needed
        normalized_data = normalized_data[np.isfinite(normalized_data)]
        normalized_data = normalized_data[normalized_data > 0]
        # Example: Print mean normalized intensity
        mean_val = np.mean(normalized_data)
        print(f"{file.name}: Mean normalized intensity = {mean_val:.2f}")
        #If we want to save normalized FITS for ML pipelines or consistency:
        # from astropy.io import fits
        # hdu = fits.PrimaryHDU(normalized_data, header=aia_map.meta)
        # hdu.writeto(f"/mnt/data/SDO-AIA/94_normalized/{file.name}", overwrite=True)
    except Exception as e:
        print(f"{file.name}: Failed - {e}")

In [None]:
## plot the basic histogram

import matplotlib.pyplot as plt
# Flatten the data and remove NaNs for a clean histogram
data = aia_map.data.flatten()
data = data[~np.isnan(data)]

plt.figure()
plt.hist(data, bins=500, color='gray')
plt.xlabel('Pixel Intensity')
plt.ylabel('Number of Pixels')
plt.title('Histogram of AIA Map Data')
plt.show()

In [None]:
## plot the log-log histogram

import numpy as np
import matplotlib.pyplot as plt

data = aia_map.data.flatten()
data = data[~np.isnan(data)]
data = data[data > 0]  # Log requires positive values
plt.figure()
# Compute histogram without plotting first to enable log scale handling
counts, bins = np.histogram(data, bins=500)
bin_centers = (bins[:-1] + bins[1:]) / 2
plt.plot(bin_centers, counts)
plt.xscale('log')
plt.yscale('log')
plt.xlabel('Pixel Intensity (log scale)')
plt.ylabel('Number of Pixels (log scale)')
plt.title('Log-Log Histogram of AIA Map Data')
plt.show()

In [None]:
import ipywidgets as widgets
from IPython.display import display

log_output = widgets.Output(layout={'border': '1px solid black'})
accordion = widgets.Accordion(children=[log_output])
accordion.set_title(0, 'Test Dropdown')
accordion.selected_index = None
display(accordion)
with log_output:
    print("If you see this message inside a collapsible box, ipywidgets is working.")

In [None]:
from tqdm.notebook import tqdm
import time
import ipywidgets as widgets
from IPython.display import display

log_output = widgets.Output(layout={'border': '1px solid black'})
accordion = widgets.Accordion(children=[log_output])
accordion.set_title(0, 'Progress Details')
accordion.selected_index = None
display(accordion)

for i in tqdm(range(5), desc="Test Progress"):
    with log_output:
        print(f"Processing step {i+1}/5")
    time.sleep(0.5)

In [None]:
import sunpy.map
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
from tqdm.notebook import tqdm
import ipywidgets as widgets
from IPython.display import display

def plot_aia_folder_histogram(
    folder_path,
    num_bins=500,
    min_intensity=1,
    max_intensity=1e4,
    wavelength="94 Å",
    figsize=(14, 6)
):
    """
    Incrementally process all FITS files in folder_path,
    plot normal + log-log histograms without memory overflow.

    Parameters:
        folder_path (str or Path): Path to the folder containing FITS files.
        num_bins (int): Number of histogram bins.
        min_intensity (float): Minimum intensity for binning (avoids log issues).
        max_intensity (float): Maximum intensity for binning.
        wavelength (str): For labeling plots.
        figsize (tuple): Figure size.
    """
    folder = Path(folder_path)
    fits_files = sorted(folder.glob("*.fits"))

    # Collapsible log setup
    log_output = widgets.Output(layout={'border': '1px solid black'})
    accordion = widgets.Accordion(children=[log_output])
    accordion.set_title(0, 'Detailed FITS File Processing Log')
    accordion.selected_index = None
    display(accordion)

    bins = np.logspace(np.log10(min_intensity), np.log10(max_intensity), num_bins + 1)
    combined_counts = np.zeros(num_bins)

    for file in tqdm(fits_files, desc=f"Processing {wavelength} FITS files"):
        try:
            aia_map = sunpy.map.Map(file)
            data = aia_map.data.flatten()

            num_nans = np.isnan(data).sum()
            num_infs = np.isinf(data).sum()
            data_min = np.nanmin(data)
            data_max = np.nanmax(data)

            with log_output:
                print(f"{file.name}: NaNs={num_nans}, Infs={num_infs}, Min={data_min}, Max={data_max}")

            data = data[~np.isnan(data)]
            data = data[~np.isinf(data)]
            data = data[data > 0]

            counts, _ = np.histogram(data, bins=bins)
            combined_counts += counts

        except Exception as e:
            with log_output:
                print(f"{file.name}: Failed to process - {e}")

    if combined_counts.sum() == 0:
        with log_output:
            print("No valid data collected for histogram.")
        return

    bin_centers = (bins[:-1] + bins[1:]) / 2

    fig, axs = plt.subplots(1, 2, figsize=figsize)

    # Normal histogram
    axs[0].plot(bin_centers, combined_counts, lw=1)
    axs[0].set_xlabel('Pixel Intensity')
    axs[0].set_ylabel('Number of Pixels')
    axs[0].set_title(f'Normal Histogram of AIA {wavelength} Data')
    axs[0].grid(True, ls=":")

    # Log-log histogram
    axs[1].plot(bin_centers, combined_counts, lw=1)
    axs[1].set_xscale('log')
    axs[1].set_yscale('log')
    axs[1].set_xlabel('Pixel Intensity (log scale)')
    axs[1].set_ylabel('Number of Pixels (log scale)')
    axs[1].set_title(f'Log-Log Histogram of AIA {wavelength} Data')
    axs[1].grid(True, which="both", ls=":")

    plt.tight_layout()
    plt.show()


In [None]:
plot_aia_folder_histogram(
    folder_path="/mnt/data/SDO-AIA/94",
    num_bins=500,
    min_intensity=1,
    max_intensity=1e4,
    wavelength="94 Å"
)