# Dataset v2

In [1]:
import pandas as pd
import numpy as np

import os
from datetime import datetime, timedelta
import re

from climso.utils import toSunpyMap

from astropy.coordinates import SkyCoord
import astropy.units as u
from sunpy.coordinates import HeliographicStonyhurst

import matplotlib.pyplot as plt


In [2]:
def convert_to_lon_lat(location):
    # Latitude
    lat_sign = 1 if location[0] == 'N' else -1
    latitude = lat_sign * int(location[1:3])
    
    # Longitude
    lon_sign = -1 if location[3] == 'E' else 1
    longitude = lon_sign * int(location[4:])
    
    return longitude, latitude


# Extract datetime from filename using regex
def extract_datetime_from_filename(filename):
    match = re.search(r'_(\d{8}_\d{6}\d{2})_', filename)
    if match:
        datetime_str = match.group(1)
        try:
            return datetime.strptime(datetime_str, '%Y%m%d_%H%M%S%f')
        except ValueError as e:
            print(f"Error parsing datetime string: {datetime_str}, error: {e}")
    return None


# Find the closest file to a given time
def find_closest_file(target_time, files):
    closest_file_dt = None
    min_diff = timedelta.max
    for file, file_time in files:
        diff = abs(file_time - target_time)
        if diff < min_diff:
            min_diff = diff
            closest_file_dt = (file, file_time)
            
    if min_diff>timedelta(minutes=5): 
        return None
    
    return closest_file_dt

In [3]:
def getImageForEvent(event_time):
        
    # Event start date and time
    event_datetime = datetime.strptime(event_time, '%Y/%m/%d %H:%M')

    directory = 'Y:/data/CLIMSO/'+str(event_datetime.year)+'/data_calibrated/'+event_datetime.strftime('%Y-%m-%d')+'/'

    if not os.path.exists(directory):
        return None
  
    # List all files in the directory for l1
    l1_files = [file for file in os.listdir(directory) if file.startswith('imoa_06563_l1_')]
    # Create a list of (filename, datetime) tuples
    l1_file_datetimes = [(file, extract_datetime_from_filename(file)) for file in l1_files]
    # Filter out any files that did not match the pattern or had parsing issues
    l1_file_datetimes = [fd for fd in l1_file_datetimes if fd[1] is not None]
    # Sort files by datetime
    l1_file_datetimes.sort(key=lambda x: x[1])

    # Find the files closest file
    l1_closest_file_dt = find_closest_file(event_datetime, l1_file_datetimes)
    if l1_closest_file_dt:
        l1_closest_file = directory+l1_closest_file_dt[0]
    elif l1_closest_file_dt is None:
        return None

    # Output the closest filenames
    return (l1_closest_file)

In [4]:
def createSample(event, flip=False):
    
    file = getImageForEvent(event['peak'])
    if file is None : return
    print('found file |', event['start'], '|', event['AR location'])
    
    lon, lat = convert_to_lon_lat(event['AR location'])
    if np.sqrt(lat**2 + lon**2)>60:
        print('Too close to limb')
        return
    
    directory = 'dataset_v2/flare'
    os.makedirs(directory, exist_ok=True)
    
    
    l1_map = toSunpyMap(file, center_disk=True)
    coord = SkyCoord(lon*u.deg, lat*u.deg,frame=HeliographicStonyhurst, observer='earth', obstime=l1_map.date)
    x, y = [int(pixel_coord.value) for pixel_coord in l1_map.world_to_pixel(coord)]
    y=2048-y

    if flip : 
        data=np.flip(l1_map.data, axis=0) 
    else : 
        data=l1_map.data

    l1_image = data[y-125:y+125, x-125:x+125]
    
    plt.imsave(f'{directory}/{event['Event number']}.png', l1_image.astype(np.uint16), cmap='gray', vmin=0, vmax=65535, format='png')

In [5]:
def createSampleNoFlare(event, time, flip=False):
    
    file = getImageForEvent(time)
    if file is None : return
    print('found file |', time, '|', event['AR location'])
    
    lon, lat = convert_to_lon_lat(event['AR location'])
    if np.sqrt(lat**2 + lon**2)>60:
        print('Too close to limb')
        return
    
    directory = 'dataset_v2/noflare'
    os.makedirs(directory, exist_ok=True)
    
    
    l1_map = toSunpyMap(file, center_disk=True)
    coord = SkyCoord(lon*u.deg, lat*u.deg,frame=HeliographicStonyhurst, observer='earth', obstime=l1_map.date)
    x, y = [int(pixel_coord.value) for pixel_coord in l1_map.world_to_pixel(coord)]
    y=2048-y

    if flip : 
        data=np.flip(l1_map.data, axis=0) 
    else : 
        data=l1_map.data

    l1_image = data[y-125:y+125, x-125:x+125]
    
    plt.imsave(f'{directory}/{event['Event number']}.png', l1_image.astype(np.uint16), cmap='gray', vmin=0, vmax=65535, format='png')

### main

In [6]:
df = pd.read_csv('FlareCatalogue.csv')

### Loop

In [None]:
for index, event in df.iterrows():
    if index<11279: #5782
        flip=index<5782
        print(index, '|', event['Event number'])
        event['start']
        createSample(event, flip=flip)


# Dataset get no flare images

### loop

In [25]:
for index, event in df.iterrows():
    if index<11279: #5782
        flip=index<5782
        
        event_last = df.iloc[index+1]
        event_next = df.iloc[index]
        
        event_last_dt = datetime.strptime(event_last['end'], '%Y/%m/%d %H:%M')
        event_next_dt = datetime.strptime(event_next['start'], '%Y/%m/%d %H:%M')
        
        diff = (event_next_dt - event_last_dt)
        if diff < timedelta(hours=1):
            image_time = event_last_dt + diff / 2
        else:
            image_time = event_next_dt - timedelta(hours=1)
        image_time_str = image_time.strftime('%Y/%m/%d %H:%M')
        
        print(index, '|', event['Event number'])
        createSampleNoFlare(event, time=image_time_str, flip=flip)

0 | 275170
1 | 275160
2 | 275150
3 | 275140
4 | 275130
5 | 275120
6 | 275110
7 | 275100
8 | 275090
9 | 275080
10 | 275070
11 | 275060
12 | 275050
13 | 275040
14 | 275030
15 | 275020
found file | 2024/06/28 10:35 | S26E24
16 | 275010
found file | 2024/06/28 09:32 | N10E89
Too close to limb
17 | 275000
found file | 2024/06/28 08:18 | N09E89
Too close to limb
18 | 274990
19 | 274980
20 | 274970
21 | 274960
22 | 274950
23 | 274940
24 | 274930
25 | 274920
26 | 274910
27 | 274900
28 | 274890
29 | 274880
30 | 274870
31 | 274860
32 | 274850
33 | 274840
34 | 274830
35 | 274820
36 | 274810
37 | 274800
38 | 274790
39 | 274780
40 | 274770
41 | 274760
42 | 274750
43 | 274740
44 | 274730
45 | 274720
46 | 274710
47 | 274700
48 | 274690
49 | 274680
50 | 274670
51 | 274660
52 | 274650
53 | 274640
54 | 274630
55 | 274620
56 | 274610
57 | 274600
58 | 274590
59 | 274580
60 | 274570
61 | 274560
62 | 274550
63 | 274540
64 | 274530
65 | 274520
66 | 274510
67 | 274500
68 | 274490
69 | 274480
70 | 274470
71 | 