# Dataset

In [226]:
import pandas as pd

import os
from datetime import datetime, timedelta
import re

from module import toSunpyMap

from astropy.coordinates import SkyCoord
import astropy.units as u
from sunpy.coordinates import HeliographicStonyhurst

import matplotlib.pyplot as plt


In [227]:
def convert_to_lon_lat(location):
    # Latitude
    lat_sign = 1 if location[0] == 'N' else -1
    latitude = lat_sign * int(location[1:3])
    
    # Longitude
    lon_sign = -1 if location[3] == 'E' else 1
    longitude = lon_sign * int(location[4:])
    
    return longitude, latitude


# Extract datetime from filename using regex
def extract_datetime_from_filename(filename):
    match = re.search(r'_(\d{8}_\d{6}\d{2})_', filename)  # Adjusted regex to match milliseconds
    if match:
        datetime_str = match.group(1)
        try:
            return datetime.strptime(datetime_str, '%Y%m%d_%H%M%S%f')
        except ValueError as e:
            print(f"Error parsing datetime string: {datetime_str}, error: {e}")
    return None


# Function to find the closest file to a given time
def find_closest_file(target_time, files):
    closest_file = None
    min_diff = timedelta.max
    for file, file_time in files:
        diff = abs(file_time - target_time)
        if diff < min_diff:
            min_diff = diff
            closest_file = (file, file_time)
            
    if min_diff>timedelta(minutes=15): 
        return None
    
    return closest_file

In [228]:
df = pd.read_csv('FlareCatalogue.csv')

In [229]:
event = df.iloc[0]
event['start']

'2023/07/31 22:52'

In [230]:
lon, lat = convert_to_lon_lat(event['AR location'])
lon, lat

(38, -8)

In [231]:
def getTimeSeriesForEvent(event):
        
    # Define the event date and time
    event_datetime_str = event['start']
    event_datetime = datetime.strptime(event_datetime_str, '%Y/%m/%d %H:%M')

    directory = 'Y:/data/CLIMSO/'+str(event_datetime.year)+'/data_calibrated/'+event_datetime.strftime('%Y-%m-%d')+'/'

    if not os.path.exists(directory):
        return None

    # List all files in the directory that match the pattern
    files = [file for file in os.listdir(directory) if file.startswith('imoa_03933_l2_')]

    # Create a list of (filename, datetime) tuples
    file_datetimes = [(file, extract_datetime_from_filename(file)) for file in files]

    # Filter out any files that did not match the pattern or had parsing issues
    file_datetimes = [fd for fd in file_datetimes if fd[1] is not None]

    # Sort files by datetime
    file_datetimes.sort(key=lambda x: x[1])

    # Find the files closest to each 1-hour interval before the event datetime
    closest_files = []
    current_time = event_datetime

    for i in range(4):
        current_time -= timedelta(minutes=30)
        closest_file = find_closest_file(current_time, file_datetimes)
        if closest_file:
            closest_files.append(closest_file)
            # Remove the found file to avoid duplicate selections
            file_datetimes.remove(closest_file)
        elif closest_file is None:
            return None

    # Extract filenames from the tuples
    closest_filenames = [fd[0] for fd in closest_files]

    # Output the closest filenames
    return closest_filenames

In [232]:
files = getTimeSeriesForEvent(event)
files

In [233]:
count=0
for index, event in df.iterrows():
    if getTimeSeriesForEvent(event):
        count+=1
        print(count,'/',index+1)

1 / 3
2 / 4
3 / 10
4 / 17
5 / 19
6 / 24
7 / 35
8 / 36
9 / 37
10 / 38
11 / 54
12 / 80
13 / 81
14 / 82
15 / 85
16 / 86
17 / 89
18 / 90
19 / 101
20 / 102
21 / 103
22 / 104
23 / 179
24 / 180
25 / 181
26 / 182
27 / 183
28 / 230
29 / 868
30 / 869
31 / 888
32 / 914
33 / 1052
34 / 1155
35 / 1156
36 / 1157
37 / 1158
38 / 1167
39 / 1168
40 / 1169
41 / 1180
42 / 1183
43 / 1335
44 / 1336
45 / 1337
46 / 1344
47 / 1345
48 / 1346
49 / 1347
50 / 1357
51 / 1358
52 / 1370
53 / 1394
54 / 1395
55 / 1416
56 / 1417
57 / 1478
58 / 1484
59 / 1487
60 / 1488
61 / 1489
62 / 1490
63 / 1497
64 / 1535
65 / 1536
66 / 1559
67 / 1560
68 / 1561
69 / 1562
70 / 1563
71 / 1573
72 / 1575
73 / 1590
74 / 1591
75 / 1654
76 / 1655
77 / 1656
78 / 1663
79 / 1671
80 / 1681
81 / 1682
82 / 1683
83 / 1684
84 / 1822
85 / 1823
86 / 2067
87 / 2134
88 / 2135
89 / 2136
90 / 2165
91 / 2166
92 / 2206
93 / 2217
94 / 2329
95 / 2330
96 / 2334
97 / 2335
98 / 2439
99 / 2440
100 / 2567
101 / 2640
102 / 2641
103 / 2647
104 / 2777
105 / 2778
106 /