# Downloading and preprocesing data from SUVI

If needed install astropy

In [None]:
!pip install astropy

Import needed libraries

In [None]:
import subprocess
import shutil
import os
from os import path 
import numpy as np
from astropy.io import fits
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm
from PIL import Image, ImageDraw

## 1. Download data 

Download files from the SWPC website from the GOES 16 satellite at wavelength 195. The code goes through all the years, months and days and downloads a file from the whole hour, so 24 files per day. Files are stored in assigned output folder. 

In [None]:
url = 'https://data.ngdc.noaa.gov/platforms/solar-space-observing-satellites/goes/goes16/l2/data/suvi-l2-ci195/'
output_folder = './SUVIfits'

command = f"wget -nH -nd -r -np -P {output_folder} -A 'dr_suvi-l2-ci195_g16_s20*T*0000Z_e*.fits' {url}"
subprocess.run(command, shell=True)


## 2. Convert fits to images

This code processes FITS files stored in a directory fits_folder. It generates corresponding PNG images and saves them in another directory image_folder. Images are generated using logarithmic scaling, where the minimum is set to a constant value of 0.1 and the maximum is dynamically calculated as the 99.9th percentile of the values in the fits file. Images are saved without axis and margins. If exception appears name of file is stored in corruptedfiles.txt.

In [None]:
fits_folder = 'SUVIfits'
image_folder = 'SUVIimgs'
if not os.path.exists(image_folder):
    os.makedirs(image_folder)
fits_files = os.listdir(fits_folder)

for fits_file in fits_files:
    fits_path = os.path.join(fits_folder, fits_file)
    
    try:
        image_data = fits.getdata(fits_path)
        image_data = np.flipud(image_data)
        image_name = fits_file.replace('.fits', '.png')
        image_path = os.path.join(image_folder, image_name)
        plt.figure(figsize=(8, 8))
        plt.imshow(image_data, cmap='gray', 
                   norm=LogNorm(vmin=0.1, 
                                vmax=(np.percentile(image_data, 99.9)).round(5), clip=True),
                   extent=[0, image_data.shape[1], image_data.shape[0], 0],
                   origin='lower')
        plt.axis('off')  
        plt.savefig(image_path, bbox_inches='tight', pad_inches=0) 
        plt.close()
        
    except Exception as e:
        with open('corruptedfiles.txt', 'a') as f:
            f.write(fits_file + '\n')
        print(f"Error for {fits_file}: {str(e)}")


## 3. Crop and resize image

This code is cropping images (rootPath) based on information extracted from FITS files (fitsPath). Cropped images are saved into dictionary (cropPath). Sun - circle is croped from Image according to coordinates from FITS file. Image dimensions are reduced from 1280 x 1280 to 256 x 256 pixels. The image is black and white. Code also addes white background.

In [None]:
rootPath = r"SUVIimgs"
fitsPath = r"SUVIfits"
cropPath = r"SUVIcroped"

if not os.path.exists(cropPath):
    os.makedirs(cropPath)

for root, dirs, files in os.walk(rootPath):
    for file in files:
        if file.endswith(".png"): 
            image = Image.open(os.path.join(root, "%s" % (file)))
            filename, extension = os.path.splitext(file)
            filename = filename + ".fits"
            for root1, dirs1, files1 in os.walk(fitsPath):
                if filename in files1:
                    file_path = os.path.join(root1, filename)
                    hdul = fits.open(file_path)
                    hdr = hdul[1].header

                    width = hdr['NAXIS1']
                    height = hdr['NAXIS2']
                    diameter = hdr['DIAM_SUN']
                    radius = diameter / 2
                    centerX = hdr['CRPIX1']
                    centerY = hdr['CRPIX2']

                    image = image.resize((256, 256))
                    lum_img = Image.new('L', [width, height], 0)
                    draw = ImageDraw.Draw(lum_img)
                    draw.ellipse(((centerX - radius), (centerY - radius), (centerX + radius), (centerY + radius)),
                                 fill=255, outline="white")
                    lum_img_resized = lum_img.resize(image.size)
                    white_bg = Image.new("RGBA", [256, 256], "WHITE")
                    result = Image.composite(image, white_bg, lum_img_resized)
                    result.save(os.path.join(cropPath, file))


## 4. Flip data

This script flips images (input_folder) vertically (top to bottom) and saves the flipped versions to another directory (output_folder).

In [None]:
input_folder = "SUVIcroped"
output_folder = "SUVIfliped"

if not os.path.exists(output_folder):
    os.makedirs(output_folder)

input_files = os.listdir(input_folder)

for filename in input_files:
     if filename.endswith((".jpg", ".png", ".jpeg")):
        image = Image.open(os.path.join(input_folder, filename))
        flipped_image = image.transpose(Image.FLIP_TOP_BOTTOM)
        flipped_image.save(os.path.join(output_folder, filename))

print("Images flipped and saved to", output_folder)


## 5. Split data to train and test set

In our work, we also divided the data into training and testing sets. The test set contained data from 2022 and the training set contained all remaining years.

In [None]:
source_folder = "./SUVIfliped/"

folder_test = "./SUVItest/"
folder_train = "./SUVItrain/"

for folder in [folder_test, folder_train]:
    if not os.path.exists(folder):
        os.makedirs(folder)
for filename in os.listdir(source_folder):
    current_path = os.path.join(source_folder, filename)
    if os.path.isfile(current_path):
        if "s2022" in filename:
            destination_folder = folder_test
        elif any(year in filename for year in ["s2019", "s2020", "s2021", "s2023"]):
            destination_folder = folder_train
        else:
            print(f"File {filename} doesn't match criteria. Skipping.")
            continue

        destination_path = os.path.join(destination_folder, filename)
        shutil.copy(current_path, destination_path)

print("File copying completed.")
