# Importación de Librerías

In [None]:
import numpy as np
import pandas as pd
import seaborn

# Carga y Visualización de Datos


In [None]:
# NOTE. Remember to load CSV onto Colab environment
daily_dataframe = pd.read_csv('SN_d_tot_V2.0.csv', sep=",")
monthly_dataframe = pd.read_csv('SN_m_tot_V2.0.csv', sep=",")
smooth_dataframe = pd.read_csv('SN_ms_tot_V2.0.csv', sep=",")

In [None]:
# Describe data
print("DAILY")
daily_dataframe.info()
print("\n\nMONTHLY")
monthly_dataframe.info()
print("\n\nSMOOTH MONTHLY")
smooth_dataframe.info()

In [None]:
# Graph data
graph_dict:dict = {"Year":[], "Month":[], "Fraction":[], "Sunspots":[], "Deviation":[], "Measures":[], "Indicator":[], "Type":[]}
types = ["Monthly (Raw)", "Monthly (Smooth)"]
for index, row in daily_dataframe.iterrows():
  graph_dict["Year"].append(row["Year"])
  graph_dict["Month"].append(row["Month"])
  graph_dict["Fraction"].append(row["Fraction"])
  graph_dict["Sunspots"].append(row["Sunspots"])
  graph_dict["Deviation"].append(row["Deviation"])
  graph_dict["Measures"].append(row["Measures"])
  graph_dict["Indicator"].append(row["Indicator"])
  graph_dict["Type"].append("Daily")
for frame_tuple in zip(monthly_dataframe.iterrows(),smooth_dataframe.iterrows()):
  zip_row = []
  zip_row.append(frame_tuple[0][1])
  zip_row.append(frame_tuple[1][1])
  for item in zip(types,zip_row):
    graph_dict["Year"].append(item[1]["Year"])
    graph_dict["Month"].append(item[1]["Month"])
    graph_dict["Fraction"].append(item[1]["Fraction"])
    graph_dict["Sunspots"].append(item[1]["Sunspots"])
    graph_dict["Deviation"].append(item[1]["Deviation"])
    graph_dict["Measures"].append(item[1]["Measures"])
    graph_dict["Indicator"].append(item[1]["Indicator"])
    graph_dict["Type"].append(item[0])
graph_dataframe = pd.DataFrame(graph_dict)
seaborn.lineplot(data=graph_dataframe,x="Fraction",y="Sunspots", hue="Type", palette=["#000088","#99FF00","#FF0000"])

In [None]:
# Graph only after 1900's, and before Jan 2024
partial_graph_dataframe = graph_dataframe[graph_dataframe.Fraction > 1900]
partial_graph_dataframe = partial_graph_dataframe[partial_graph_dataframe.Fraction <= 2024]
seaborn.lineplot(data=partial_graph_dataframe,x="Fraction",y="Sunspots", hue="Type", palette=["#000088","#99FF00","#FF0000"])

# Reformando la data

In [None]:
data_array = smooth_dataframe.to_numpy()

# Modelo Keras - Recurrente Simple

In [None]:
import keras as kr
from keras.models import Sequential
from keras.layers import SimpleRNN, Dense

In [None]:
sequence_length = 9
scale_factor = 0.5

model = Sequential()
model.add(SimpleRNN(units= 6, input_shape= (sequence_length, 1), activation= "tanh"))
model.add(Dense(units= 1, activation= "linear"))

model.compile(loss= "mean_squared_error", optimizer= "adam")

## Reformateando los datos

In [None]:
def split_data(df:pd.DataFrame, time_steps:int = 12, scale_factor: float = 1.0) -> tuple[np.ndarray, np.ndarray]:
  data_multilist: list[list[float]] = []
  tag_list: list[float] = []
  current: list[float] = []
  for index, row in df.iterrows():
    if len(current) == time_steps:
      tag_list.append(row["Sunspots"]*scale_factor)
      data_multilist.append(np.array(current))
      current = current[1:] #Remove first element
    if row["Sunspots"] != -1:
      current.append((row["Sunspots"])*scale_factor)
  return np.array(data_multilist), np.array(tag_list)

data_X, data_labels = split_data(smooth_dataframe, sequence_length, scale_factor)

from math import floor

separator = floor(len(data_X)*0.7)

train_X, train_labels = data_X[: separator], data_labels[: separator]
test_X, test_labels = data_X[separator :], data_labels[separator :]

In [None]:
model.fit(train_X, train_labels, batch_size= 1, epochs= 50)

In [None]:
predict_labels = model.predict(test_X)

# print(predict_labels)

In [None]:
from matplotlib import pyplot

pyplot.plot(range(len(test_X)), (predict_labels)/scale_factor, color= 'red')
pyplot.plot(range(len(test_X)), (test_labels)/scale_factor, color= 'blue')
pyplot.show()

# Modelo Keras - LSTM

## Reformatear los datos

In [None]:
def to_train_data(df:pd.DataFrame, time_steps:int = 12) -> tuple[np.ndarray, np.ndarray]:
  data_multilist: list[list[float]] = []
  tag_list: list[float] = []
  current: list[float] = []
  for index, row in df.iterrows():
    if len(current) == time_steps:
      tag_list.append(row["Sunspots"])
      data_multilist.append(current)
      current = current[1:] #Remove first element
    current.append(row["Sunspots"])
  return np.array(data_multilist), np.array(tag_list)

LSTM_train_data, LSTM_train_tags = to_train_data(smooth_dataframe)
for index, zip_item in enumerate(zip(LSTM_train_data, LSTM_train_tags)):
  dataiter, tagiter = zip_item
  print(index, " --> ", dataiter, "||", tagiter)
  if index >= 20:
    break

# Análisis Hiperparámetros

# Comparación Modelos