In [None]:
"""
    File: Download_satellite_data.py
    Authors: Martin Sanner, Josh Veitch-Michaelis
    Created: 14.7.2023 - 15:14 CEST

    Data Downloader creating the required folder structure locally to save data to for COR1, COR2 data for specified times.
    Times are specified in the .ephemeris files, created using the Nasa HORIZONS tool. (.ephemeris are renamed text files)

    Steps:
        1: Load Ephemeris data
        2: Estimate when 60° angles are hit
        3: Gather data from COR1, COR2 on SECCHI
        4: Create folder structure for COR1, COR2/media/josh/22893c92-cb6e-4f81-a50d-a9ff93ad5a811/data/fdl/2023/onboard/data/events/20140221_231212/cor1/20140218_231512_s4c1a.fts
        5: Download data to that folder
"""

import logging
import os
from datetime import date, datetime, timedelta
from functools import reduce
from glob import glob
from importlib.resources import files
import time
import shutil

import astropy.io.fits as fits
import numpy as np
import pandas as pd
import sscws
import sunpy
import yaml
import logging
from rich.progress import Progress
from sunpy.net import Fido
from sunpy.net import attrs as a
from typing import List
import astropy.table
from astropy.time.core import Time
from datetime import datetime, timedelta
from rich.progress import track

import astropy.io.fits as fitsio
import matplotlib.pyplot as plt
import cv2
from natsort import natsorted
%matplotlib inline

In [None]:
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

In [None]:
def load_ephemeris_data(fname: str) -> list:
    """
    Loads data from horizon ephemeris files, passed as argument.
    """
    with open(fname) as f:
        data = f.readlines()

    start_position = 0
    end_position = 0
    for i, line in enumerate(data):
        if "$$SOE" in line:
            start_position = i + 1
        if "$$EOE" in line:
            end_position = i

    return data[start_position:end_position]


def data_to_vectors(data, time_list: list = None):
    """
    Takes the data returned by the load_data function and turns it into the required vectors
    """
    time = time_list if time_list is not None else []
    x = []
    y = []
    z = []
    r = []

    for line in data:
        t, p = line.strip("\n").split("     ")
        t2 = t.split(" ")[1] + " " + t.split(" ")[2]
        if time_list is not None:
            assert (
                datetime.strptime(t2, "%Y-%b-%d %H:%M") in time_list
            ), "New Time found in line {}".format(line)
        else:
            time.append(datetime.strptime(t2, "%Y-%b-%d %H:%M"))
        space_split = [i for i in p.split(" ") if i != ""]
        RA = float(space_split[0]) * np.pi / 180  # Radian for numpy
        DEC = float(space_split[1]) * np.pi / 180  # Radian for numpy

        cr = float(space_split[2])  # in km
        # dont use rdot here
        cx = cr * np.cos(DEC) * np.cos(RA * np.cos(DEC))  # km
        cy = cr * np.cos(DEC) * np.sin(RA * np.cos(DEC))  # km
        cz = cr * np.sin(DEC)  # km

        x.append(cx)
        y.append(cy)
        z.append(cz)
        r.append(cr)
    return time, x, y, z, r

def download_batch(batch : List, folder : str) -> None:

    filenames = []
    try:
        os.makedirs(folder, exist_ok=True, mode=0o777)
    except OSError as e:
        logging.error("Error when creating Folder = {} - {}".format(folder, e))

    try:
        filenames = Fido.fetch(
            batch,
            path="{}/".format(folder),
            progress=False,
            overwrite=True,
            max_conn=8
        )

        if len(filenames) > 0:
            Fido.fetch(
                filenames,
                path="{}/".format(folder),
                progress=False,
                overwrite=True,
                max_conn=8
            )
    except KeyboardInterrupt:
        return
    except Exception as e:
        logging.error(
            "Error encountered in downloading batch: {}".format(e)
        )

    return

def get_events(min_time, max_time):
    """
    Get events from HEK
    """
    time_requested_for_batch = a.Time(str(min_time), str(max_time))
    event_type = "CE"
    events = Fido.search(
        time_requested_for_batch, a.hek.EventType(event_type)
    )
    return events

def get_images(min_time, max_time) -> dict:#
    time_requested_for_batch = a.Time(str(min_time), str(max_time))
    images_cor1 = Fido.search(
        time_requested_for_batch, a.Instrument("SECCHI"), a.Detector("COR1")
    )
    images_cor2 = Fido.search(
        time_requested_for_batch, a.Instrument("SECCHI"), a.Detector("COR2")
    )
    images_lasco_c2 = Fido.search(
        time_requested_for_batch, a.Instrument("LASCO"), a.Detector("C2")
    )
    images_lasco_c3 = Fido.search(
        time_requested_for_batch, a.Instrument("LASCO"), a.Detector("C3")
    )
    
    return {"cor1": images_cor1, "cor2": images_cor2, "lasco_c2": images_lasco_c2, "lasco_c3": images_lasco_c3}

def generate_preview_and_video(folder, pattern="*.fts", out="event.avi"):
    ims = natsorted(glob(os.path.join(folder, pattern)))

    if len(ims) == 0:
        logger.info("No images found matching pattern")
        return
        
    vid_path = os.path.join(folder, out)
    writer = cv2.VideoWriter(vid_path, apiPreference=cv2.CAP_FFMPEG, frameSize=fitsio.getdata(ims[0]).shape, fourcc=cv2.VideoWriter_fourcc(*"MJPG"), fps=3)
    
    for i, im in enumerate(ims):

        try:
            header = fitsio.getheader(im)
            
            raw_image  = np.log(fitsio.getdata(im).astype(np.float32))
            scaled = (raw_image - raw_image.min()) / (raw_image.max() - raw_image.min() + 1e-15)
        
            scaled *= 255
            scaled = scaled.astype(np.uint8)
        
            base = os.path.splitext(os.path.basename(im))[0]
            colored = cv2.applyColorMap(scaled, cv2.COLORMAP_INFERNO)
            cv2.imwrite(os.path.join(folder, base+".jpg"), colored)
            writer.write(colored)
        except:
            continue
    
    writer.release()

In [None]:
ephem_dir = os.path.dirname("./")
l5_positions_fname = os.path.join(ephem_dir, "L5_positions.ephemeris")
stereoA_positions_fname = os.path.join(ephem_dir, "StereoA_positions.ephemeris")
stereoB_positions_fname = os.path.join(ephem_dir, "StereoB_positions.ephemeris")
SOHO_positions_fname = os.path.join(ephem_dir, "SOHO_positions.ephemeris")

# Get data
l5_positions_data = load_ephemeris_data(l5_positions_fname)
stereoA_positions_data = load_ephemeris_data(stereoA_positions_fname)
stereoB_positions_data = load_ephemeris_data(stereoB_positions_fname)
SOHO_positions_data = load_ephemeris_data(SOHO_positions_fname)
# TODO: Create a method to find the dataframe without requiring equal time
l5_time, L5x, L5y, L5z, L5r = data_to_vectors(l5_positions_data)
_, SAx, SAy, SAz, SAr = data_to_vectors(stereoA_positions_data, l5_time)
_, SBx, SBy, SBz, SBr = data_to_vectors(stereoB_positions_data, l5_time)
_, Sohox, Sohoy, Sohoz, Sohor = data_to_vectors(SOHO_positions_data, l5_time)

initial_colnames = [
    "L5 x [km]",
    "L5 y [km]",
    "L5 z [km]",
    "L5 r [km]",
    "SA x [km]",
    "SA y [km]",
    "SA z [km]",
    "SA r [km]",
    "SB x [km]",
    "SB y [km]",
    "SB z [km]",
    "SB r [km]",
    "SOHO x [km]",
    "SOHO y [km]",
    "SOHO z [km]",
    "SOHO r [km]",
]
initial_data = (
    np.asarray(
        [
            L5x,
            L5y,
            L5z,
            L5r,
            SAx,
            SAy,
            SAz,
            SAr,
            SBx,
            SBy,
            SBz,
            SBr,
            Sohox,
            Sohoy,
            Sohoz,
            Sohor,
        ]
    )
).T

df = pd.DataFrame(initial_data, index=l5_time, columns=initial_colnames)
df["Distance L5 Stereo A [km]"] = np.sqrt(
    (df["L5 x [km]"] - df["SA x [km]"]) ** 2
    + (df["L5 y [km]"] - df["SA y [km]"]) ** 2
    + (df["L5 z [km]"] - df["SA z [km]"]) ** 2
)
df["Distance L5 Stereo B [km]"] = np.sqrt(
    (df["L5 x [km]"] - df["SB x [km]"]) ** 2
    + (df["L5 y [km]"] - df["SB y [km]"]) ** 2
    + (df["L5 z [km]"] - df["SB z [km]"]) ** 2
)
df["Stereo AB Angle [deg]"] = (
    np.arccos(
        (
            df["SA x [km]"] * df["SB x [km]"]
            + df["SA y [km]"] * df["SB y [km]"]
            + df["SA z [km]"] * df["SB z [km]"]
        )
        / (df["SA r [km]"] * df["SB r [km]"])
    )
    * 180
    / np.pi
)
df["Stereo A Soho Angle [deg]"] = (
    np.arccos(
        (
            df["SA x [km]"] * df["SOHO x [km]"]
            + df["SA y [km]"] * df["SOHO y [km]"]
            + df["SA z [km]"] * df["SOHO z [km]"]
        )
        / (df["SA r [km]"] * df["SOHO r [km]"])
    )
    * 180
    / np.pi
)
df["Stereo B Soho Angle [deg]"] = (
    np.arccos(
        (
            df["SB x [km]"] * df["SOHO x [km]"]
            + df["SB y [km]"] * df["SOHO y [km]"]
            + df["SB z [km]"] * df["SOHO z [km]"]
        )
        / (df["SB r [km]"] * df["SOHO r [km]"])
    )
    * 180
    / np.pi
)

# Parameters to define L5 geometry
earth_l5_angle_degrees = 60
error_range_degrees = 10
required_distance_to_l5_km = 50000000  # km
max_angle_between_crafts_deg = earth_l5_angle_degrees + error_range_degrees
min_angle_between_crafts_deg = earth_l5_angle_degrees - error_range_degrees

min_angle_between_crafts_deg

approx_date_last_B_contact = date(2016, 9, 1)  # some time September 2016

# All angles where Stereo AB are 60 degrees apart
df_angles_fit = df.query(
    "`Stereo AB Angle [deg]` >= {} & `Stereo AB Angle [deg]` <= {}".format(
        min_angle_between_crafts_deg, max_angle_between_crafts_deg
    )
)

# Perfect fits if it ever exists
#df_angles_perfect_fit = df.query(
#    "abs(`Stereo AB Angle [deg]` - {}) <= {}".format(earth_l5_angle_degrees, 1e-6)
#)

df_stereo_soho_angles_fit = df.query(
    "`Stereo B Soho Angle [deg]` <= {} & `Stereo A Soho Angle [deg]` <= {}".format(
        max_angle_between_crafts_deg, max_angle_between_crafts_deg
    )
)

df_StereoB_close = df.query(
    "`Distance L5 Stereo B [km]` <= {}".format(required_distance_to_l5_km)
)
df_StereoA_close = df.query(
    "`Distance L5 Stereo A [km]` <= {}".format(required_distance_to_l5_km)
)

# get times from df_{}.index
# Main point: Do angles fit? - more important than distance for now.
angle_AB_index = df_angles_fit.index
angle_ABSOHO_index = df_stereo_soho_angles_fit.index

stereoab = set([d for d in df_angles_fit.index])
soho_stereoab = set([d for d in df_stereo_soho_angles_fit.index])

overlap_dates = sorted(list(stereoab.intersection(soho_stereoab)))

In [None]:
config_path = "/home/josh/code/2023-europe-space-weather/config"

with open(os.path.join(config_path, "onboard.yaml"), "r") as f:
    data_path = yaml.load(f, Loader=yaml.Loader)["drive_locations"]["datapath"]

cor1_folder = os.path.join(data_path, "data", "cor1")
cor2_folder = os.path.join(data_path, "data", "cor2")
events_folder = os.path.join(data_path, "data", "events")
 
try:
    os.makedirs(events_folder, exist_ok=True)
except OSError as e:
    logging.error("Error when creating Folder = {} - {}".format(event_folder, e))

In [None]:
image_type = lambda x: os.path.splitext(os.path.basename(x))[0].split('_')[-1][0]
stereo_satellite = lambda x: os.path.splitext(os.path.basename(x))[0].split('_')[-1][-1]

In [None]:
def make_previews(root):
    for satellite in os.listdir(root):
        if os.path.isfile(satellite):
            continue
    
        for angle in os.listdir(os.path.join(root, satellite)):
            generate_preview_and_video(os.path.join(root, satellite, angle))

In [None]:
def sort_folder_pol(folder):
    for image in glob(os.path.join(folder, "*.fts")):
        imtype = image_type(image)
        sat = stereo_satellite(image).lower()

        polar_angle = fitsio.getheader(image)['POLAR']
        pol_folder = os.path.join(folder, "stereo_"+str(sat), str(polar_angle))
        os.makedirs(pol_folder, exist_ok=True)
        shutil.copy2(image, pol_folder)
        os.remove(image)

        if len(os.listdir(folder)) == 0:
            shutil.rmtree(folder)

In [None]:
def copy_folder_to_gcs(source_folder_path, bucket, destination_folder_path=''):

    # Ensure the destination folder path ends with "/"
    if destination_folder_path and not destination_folder_path.endswith("/"):
        destination_folder_path += "/"

    for root, _, files in os.walk(source_folder_path):
        for file in files:
            # Calculate source file path
            source_file_path = os.path.join(root, file)

            # Calculate destination file path in GCS
            destination_blob_name = os.path.join(destination_folder_path, os.path.relpath(source_file_path, source_folder_path))

            # Create a blob from the source file and copy it to the destination
            blob = bucket.blob(destination_blob_name)
            blob.upload_from_filename(source_file_path)

!gcloud auth application-default login

In [None]:
!gcloud auth application-default set-quota-project fdl-europe-space-weather
os.environ['GOOGLE_CLOUD_PROJECT'] = 'fdl-europe-space-weather'

In [None]:
from google.cloud import storage    
client = storage.Client(project='fdl-europe-space-weather')
bucket = client.get_bucket('fdl_space_weather_data')

In [None]:
output_folder = "/media/josh/josh_tuf_a/data/fdl/2023/onboard/"

try:
    os.makedirs(output_folder, exist_ok=True)
except OSError as e:
    logging.error("Error when creating Folder = {} - {}".format(event_folder, e))

In [None]:
_start = datetime.strptime("20140301_00:00:00", "%Y%m%d_%H:%M:%S")
start = _start
end = start + timedelta(days=1)

while end < _start + timedelta(days=30):

    logger.info(f"Searching range {start} to {end}")
    
    res = get_images(start, end)
    capture_date = start.strftime("%Y%m%d")
    event_path = os.path.join(output_folder, capture_date)
    
    if len(res['cor1']) != 0:
        folder = os.path.join(event_path, "cor1")
        logger.info(f"Starting Cor1 Download to {folder}")
        download_batch(res["cor1"], folder)
        sort_folder_pol(folder)
        #make_previews(folder)
    
    if len(res['cor2']) != 0:
        folder = os.path.join(event_path, "cor2")
        logger.info(f"Starting Cor2 Download to {folder}")
        download_batch(res["cor2"], folder)
        sort_folder_pol(folder)
        #make_previews(folder)
    
    logger.info("Starting Lasco C2 Download")
    folder = os.path.join(event_path, "lasco_c2")
    download_batch(res["lasco_c2"], folder)
    #generate_preview_and_video(folder, "*.fts", out="event.avi")
    
    logger.info("Starting Lasco C3 Download")
    folder = os.path.join(event_path, "lasco_c3")
    download_batch(res["lasco_c3"], folder)
    #generate_preview_and_video(folder, "*.fts", out="event.avi")
    
    logger.info("Pushing to bucket")
    #copy_folder_to_gcs(event_path, bucket, f'space/{capture_date}')
    #shutil.rmtree(event_path)
    
    start += timedelta(days=1)
    end += timedelta(days=1)

In [None]:
#!gustil -m rsync -r <output_folder> gs://fdl_space_weather_data/onboard