# Importación de Librerías

In [None]:
import numpy as np
import pandas as pd
import seaborn

# Carga y Visualización de Datos


In [None]:
# NOTE. Remember to load CSV onto Colab environment
daily_dataframe = pd.read_csv('SN_d_tot_V2.0.csv', sep=";")
monthly_dataframe = pd.read_csv('SN_m_tot_V2.0.csv', sep=";")
smooth_dataframe = pd.read_csv('SN_ms_tot_V2.0.csv', sep=";")

In [None]:
# Describe data
print("DAILY")
daily_dataframe.info()
print("\n\nMONTHLY")
monthly_dataframe.info()
print("\n\nSMOOTH MONTHLY")
smooth_dataframe.info()

In [None]:
# Graph data
graph_dict:dict = {"Year":[], "Month":[], "Fraction":[], "Sunspots":[], "Deviation":[], "Measures":[], "Indicator":[], "Type":[]}
types = ["Monthly (Raw)", "Monthly (Smooth)"]
for index, row in daily_dataframe.iterrows():
  graph_dict["Year"].append(row["Year"])
  graph_dict["Month"].append(row["Month"])
  graph_dict["Fraction"].append(row["Fraction"])
  graph_dict["Sunspots"].append(row["Sunspots"])
  graph_dict["Deviation"].append(row["Deviation"])
  graph_dict["Measures"].append(row["Measures"])
  graph_dict["Indicator"].append(row["Indicator"])
  graph_dict["Type"].append("Daily")
for frame_tuple in zip(monthly_dataframe.iterrows(),smooth_dataframe.iterrows()):
  zip_row = []
  zip_row.append(frame_tuple[0][1])
  zip_row.append(frame_tuple[1][1])
  for item in zip(types,zip_row):
    graph_dict["Year"].append(item[1]["Year"])
    graph_dict["Month"].append(item[1]["Month"])
    graph_dict["Fraction"].append(item[1]["Fraction"])
    graph_dict["Sunspots"].append(item[1]["Sunspots"])
    graph_dict["Deviation"].append(item[1]["Deviation"])
    graph_dict["Measures"].append(item[1]["Measures"])
    graph_dict["Indicator"].append(item[1]["Indicator"])
    graph_dict["Type"].append(item[0])
graph_dataframe = pd.DataFrame(graph_dict)
seaborn.lineplot(data=graph_dataframe,x="Fraction",y="Sunspots", hue="Type", palette=["#000088","#99FF00","#FF0000"])

In [None]:
# Graph only after 1900's, and before Jan 2024
partial_graph_dataframe = graph_dataframe[graph_dataframe.Fraction > 1900]
partial_graph_dataframe = partial_graph_dataframe[partial_graph_dataframe.Fraction <= 2024]
seaborn.lineplot(data=partial_graph_dataframe,x="Fraction",y="Sunspots", hue="Type", palette=["#000088","#99FF00","#FF0000"])

# Reformando la data

In [None]:
data_array = smooth_dataframe.to_numpy()

# Modelo Keras - Recurrente Simple

In [None]:
import keras as kr
from keras.models import Sequential
from keras.layers import SimpleRNN, Dense, LSTM, Activation

In [None]:
time_steps = 12

model = Sequential()
model.add(SimpleRNN(units= 3, input_shape= (time_steps, 1), activation= "tanh"))
model.add(Dense(units= 1, activation= "tanh"))

model.compile(loss= "mean_squared_error", optimizer= "adam")

# Modelo Keras - LSTM

## Reformatear los datos

In [None]:
def to_LSTM_format(df:pd.DataFrame, time_steps:int = 12) -> tuple[np.ndarray, np.ndarray]:
  data_multilist: list[np.ndarray] = []
  tag_list: list[float] = []
  current: list[float] = []
  for index, row in df.iterrows():
    if len(current) == time_steps:
      tag_list.append(row["Sunspots"])
      data_multilist.append(np.array(current))
      current = current[1:] #Remove first element
    current.append(row["Sunspots"])
  return np.array(data_multilist), np.array(tag_list)

LSTM_train_data, LSTM_train_tags = to_LSTM_format(smooth_dataframe.iloc[6:])
for index, zip_item in enumerate(zip(LSTM_train_data, LSTM_train_tags)):
  dataiter, tagiter = zip_item
  print(index, "\t-->\t", dataiter, "\t|| ", tagiter)
  if index >= 20:
    break

In [None]:
# Construct LSTM network
LSTM_model = Sequential()
LSTM_model.add(LSTM(8, input_shape = (12,1), return_sequences = False))
LSTM_model.add(Dense(1, kernel_initializer='normal',activation='linear'))
LSTM_model.compile(loss='mse', optimizer='adam', metrics=['accuracy'])

LSTM_model.fit(LSTM_train_data, LSTM_train_tags, epochs=50, batch_size=5)

In [None]:
validate_dataframe = smooth_dataframe[smooth_dataframe.Fraction >= 2014]
validate_dataframe = validate_dataframe[validate_dataframe.Fraction <= 2024]

LSTM_validation_data, LSTM_validation_tags = to_LSTM_format(validate_dataframe)
LSTM_prediction = LSTM_model.predict(LSTM_validation_data)

graph_dict:dict = {"Fraction":[], "Sunspots":[], "Type":[]}
mean_rel_error:float = 0
for item in zip(validate_dataframe.iterrows(), LSTM_validation_tags, LSTM_prediction):
  index, row = item[0] #Extract index and row from dataframe iterrows
  for elem in zip([item[1], item[2][0]], ["Expected", "Real"]): #Unwrap value from prediction's resulting ndarray
    graph_dict["Fraction"].append(row["Fraction"])
    graph_dict["Sunspots"].append(elem[0])
    graph_dict["Type"].append(elem[1])
  mean_rel_error += (1/len(LSTM_prediction)) * np.abs( (item[1] - item[2]) / item[2] ) # Calculated as real value minus expected value, divided by expected value
mean_rel_error = mean_rel_error[0] #Unwrap value from ndarray

# Show error
print("Mean Relative Error -->", mean_rel_error)
# Graph
LSTM_graph_dataframe = pd.DataFrame(graph_dict)
seaborn.lineplot(data=LSTM_graph_dataframe,x="Fraction",y="Sunspots", hue="Type", palette=["#000088","#99FF00"])

# Análisis Hiperparámetros

# Comparación Modelos