<a href="https://colab.research.google.com/github/JD314/Proyecto-solar/blob/main/Helios.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [22]:
#Funciones para obtener los datos CME

def get_lasco(year:str, month:str):
    """Get dataFrame of CME detected by Lasco catalog CACTUS of the giving date
    # Update every five days
    Arguments: year: str; month: str (in numeric format)
    Example of expected arguments: Year= '2021', month= '04' 
    OUTPUT: DataFrame"""

    import pandas as pd
    import urllib.request

    #Get data from Cactus cmecat.txt of this month
    cactus = f'https://www.sidc.be/cactus/catalog/LASCO/2_5_0/qkl/{year}/{month}/cmecat.txt'

    cmecat = urllib.request.urlopen(cactus)
    lines = []

    # -- Decodificar el txt --
    for line in cmecat:
        decoded_line = line.decode("utf-8")
        lines.append(decoded_line)

    # -- Limpiar los datos -- 
    datos = lines[26: 26+lines[26:].index(' \n')]    # Los datos inician en la fila 26 y terminan cuando aparece ' \n'
    data = {i: [j.replace('\n', '').replace('?', '').replace('#', '') for j in datos[i].split('|')] for i in range(len(datos))}

    # -- Crear dataframe auxiliar para corregir las columnas --
    df_cme = pd.DataFrame.from_dict(data, orient='Index')
    df_cme.columns = [df_cme.iloc[0][i].replace(' ', '') for i in range(df_cme.shape[1])] #Columsn tag are the first line, without spaces

    lasco = pd.read_json(df_cme.iloc[1:].to_json()).set_index('CME') #Crear el dataframe cme
    lasco['t0'] = pd.to_datetime(lasco.t0)
    lasco.rename(columns={'t0':'time_tag'}, inplace=True)

    return lasco

def get_lasco_rt():
    """Get dataFrame of CME detected by Lasco near real time data
    OUTPUT: DataFrame"""

    import pandas as pd
    import urllib.request
    
    cactus = 'https://www.sidc.be/cactus/out/cmecat.txt'
    cmecat = urllib.request.urlopen(cactus)
    lines = []

    # -- Decodificar el txt --
    for line in cmecat:
        decoded_line = line.decode("utf-8")
        lines.append(decoded_line)

    # -- Limpiar los datos -- 
    datos = lines[26: 26+lines[26:].index(' \n')]    # Los datos inician en la fila 26 y terminan cuando aparece ' \n'
    data = {i: [j.replace('\n', '').replace('?', '').replace('#', '') for j in datos[i].split('|')] for i in range(len(datos))}

    # -- Crear dataframe auxiliar para corregir las columnas --
    df_cme = pd.DataFrame.from_dict(data, orient='Index')
    df_cme.columns = [df_cme.iloc[0][i].replace(' ', '') for i in range(df_cme.shape[1])] #Columsn tag are in the first line, without spaces

    lasco = pd.read_json(df_cme.iloc[1:].to_json()).set_index('CME') #Crear el dataframe cme
    lasco['t0'] = pd.to_datetime(lasco.t0)
    lasco.rename(columns={'t0':'time_tag'}, inplace=True)
    
    return lasco

#Funciones para obtener Xray
def get_goes():
    """"Get the 7 days-real time data from xray/goes 16
    OUTPUT: DataFrame"""
    url = 'https://services.swpc.noaa.gov/json/goes/primary/xrays-7-day.json'
    xray = pd.read_json(url)

    # -- Manejo de datos temporales --
    xray.time_tag = xray['time_tag'].apply(lambda x: x.replace('T', ' ').replace('Z', ''))
    xray.time_tag = pd.to_datetime(xray.time_tag)

    return xray



In [23]:
from datetime import datetime
import pandas as pd

today = datetime.now()

cme = get_lasco_rt()
goes = get_goes()


if today.day < 3:
    
    #si es inicio de mes tomar los datos de finales del mes pasado

    last_date = today - pd.Timedelta(4,'d')
    date = last_date.strftime("%Y-%m-%d")
    date = date.split('-')
    year, month, day = date[0], date[1], date[2]

    last_mont = get_lasco(year, month)
    last_mont = last_mont[(last_mont.time_tag > last_date)]
    
    cme = pd.concat([cme, last_mont])

# -- Append flux --
cme.time_tag = pd.to_datetime(cme.time_tag)

cme = cme[cme.da > 17]
CME = cme.assign(flux=0.0)


for index, row in CME.iterrows():

    # -- iteración sobre cada CME --
    cme = CME.loc[index] 
    time = cme.time_tag

    #Tomar el xray flare con mayor flujo en el intervalo de tiempo que se da la EMC
    cond = pd.Timedelta(cme.dt0/10,'h') # /10 se "ajusta"
    time_min = time - cond
    time_max = time + cond

    intervalo = goes[(goes.time_tag > time_min) & (goes.time_tag < time_max)] #Intervalo de tiempo para tomar el max flux

    flux = intervalo.flux.max()
    CME.loc[index, 'flux'] = flux

datos = CME[['flux', 'v', 'minv', 'maxv']]

last_event = datos.tail(1)

classifier.predict(last_event.to_numpy())

NameError: ignored