# Importación de Librerías

In [None]:
import numpy as np
import pandas as pd
import seaborn

# Carga y Visualización de Datos


In [None]:
# NOTE. Remember to load CSV onto Colab environment
daily_dataframe = pd.read_csv('SN_d_tot_V2.0.csv', sep=",")
monthly_dataframe = pd.read_csv('SN_m_tot_V2.0.csv', sep=",")
smooth_dataframe = pd.read_csv('SN_ms_tot_V2.0.csv', sep=",")

In [None]:
# Describe data
print("DAILY")
daily_dataframe.info()
print("\n\nMONTHLY")
monthly_dataframe.info()
print("\n\nSMOOTH MONTHLY")
smooth_dataframe.info()

In [None]:
# Graph data
graph_dict:dict = {"Year":[], "Month":[], "Fraction":[], "Sunspots":[], "Deviation":[], "Measures":[], "Indicator":[], "Type":[]}
types = ["Monthly (Raw)", "Monthly (Smooth)"]
for index, row in daily_dataframe.iterrows():
  graph_dict["Year"].append(row["Year"])
  graph_dict["Month"].append(row["Month"])
  graph_dict["Fraction"].append(row["Fraction"])
  graph_dict["Sunspots"].append(row["Sunspots"])
  graph_dict["Deviation"].append(row["Deviation"])
  graph_dict["Measures"].append(row["Measures"])
  graph_dict["Indicator"].append(row["Indicator"])
  graph_dict["Type"].append("Daily")
for frame_tuple in zip(monthly_dataframe.iterrows(),smooth_dataframe.iterrows()):
  zip_row = []
  zip_row.append(frame_tuple[0][1])
  zip_row.append(frame_tuple[1][1])
  for item in zip(types,zip_row):
    graph_dict["Year"].append(item[1]["Year"])
    graph_dict["Month"].append(item[1]["Month"])
    graph_dict["Fraction"].append(item[1]["Fraction"])
    graph_dict["Sunspots"].append(item[1]["Sunspots"])
    graph_dict["Deviation"].append(item[1]["Deviation"])
    graph_dict["Measures"].append(item[1]["Measures"])
    graph_dict["Indicator"].append(item[1]["Indicator"])
    graph_dict["Type"].append(item[0])
graph_dataframe = pd.DataFrame(graph_dict)
seaborn.lineplot(data=graph_dataframe,x="Fraction",y="Sunspots", hue="Type", palette=["#000088","#99FF00","#FF0000"])

In [None]:
# Graph only after 1900's, and before Jan 2024, and before Jan 2024
partial_graph_dataframe = graph_dataframe[graph_dataframe.Fraction > 1900]
partial_graph_dataframe = partial_graph_dataframe[partial_graph_dataframe.Fraction <= 2024]
partial_graph_dataframe = partial_graph_dataframe[partial_graph_dataframe.Fraction <= 2024]
seaborn.lineplot(data=partial_graph_dataframe,x="Fraction",y="Sunspots", hue="Type", palette=["#000088","#99FF00","#FF0000"])

# Modelo Keras - Recurrente Simple

In [None]:
import keras as kr
from keras.models import Sequential
from keras.layers import SimpleRNN, Dense, LSTM, Activation

In [None]:
sequence_length = 5

model = Sequential()
model.add(SimpleRNN(units= 5, input_shape= (sequence_length, 1), activation= "tanh"))
model.add(Dense(units= 1, activation= "tanh"))

model.compile(loss= "mean_squared_error", optimizer= "adam")

## Reformateando los datos

In [None]:
import warnings

def normalize_data(df: pd.DataFrame, interval_top: float = 0.9, interval_bottom: float = -0.9) -> tuple[pd.DataFrame, float, float]:
  sunspot_data: list[float] = []
  for sunspot in df["Sunspots"]:
    if sunspot != -1:
      sunspot_data.append(sunspot)

  data_max: float = max(sunspot_data)
  data_min: float = min(sunspot_data)
  denom: float = data_max - data_min
  diff: float = interval_top - interval_bottom
  normalized_df = df.copy()
  # print(normalized_dataframe)
  with warnings.catch_warnings():
    warnings.simplefilter(action='ignore')
    for index, row in df.iterrows():
      if row["Sunspots"] != -1:
        normalized_df["Sunspots"][index] = (((row["Sunspots"] - data_min)/(denom))*(diff)) + interval_bottom

  return normalized_df, data_max, data_min

def split_data(df:pd.DataFrame, time_steps:int = 12) -> tuple[np.ndarray, np.ndarray, list[float]]:
  data_multilist: list[list[float]] = []
  tag_list: list[float] = []
  year_fraction_list: list[float] = []
  current: list[float] = []
  for index, row in df.iterrows():
    if len(current) == time_steps:
      tag_list.append(row["Sunspots"])
      year_fraction_list.append(row["Fraction"])
      data_multilist.append(np.array(current))
      current = current[1:] #Remove first element
    # if row["Sunspots"] != -1:
    current.append((row["Sunspots"]))
  return np.array(data_multilist), np.array(tag_list), year_fraction_list

# print(monthly_dataframe)
normalized_dataframe, data_max, data_min = normalize_data(daily_dataframe)

data_X, data_labels, data_years = split_data(normalized_dataframe, sequence_length)

# print(data_labels)
from math import floor

separator = floor(len(data_X)*0.7)

train_X, train_labels = data_X[: separator], data_labels[: separator]
test_X, test_labels, test_years = data_X[separator :], data_labels[separator :], data_years[separator :]
# print(train_labels)
# print(test_labels)

In [None]:
model.fit(train_X, train_labels, batch_size= 50, epochs= 20)

In [None]:
predict_labels = model.predict(test_X)

# print(predict_labels)

In [None]:
def denormalize_labels(labels: np.ndarray, true_max: float, true_min: float) -> np.ndarray:
  denormalized_labels: list[float] = []
  current_max: float = max(labels)
  current_min: float = min(labels)
  denom: float = current_max - current_min
  diff: float = true_max - true_min
  for label in labels:
    denormalized_labels.append((((label - current_min)/(denom))*diff) + true_min)
  return np.array(denormalized_labels)

from matplotlib import pyplot

denormalized_predicts = denormalize_labels(predict_labels, data_max, data_min)
denormalized_labels = denormalize_labels(test_labels, data_max, data_min)

# pyplot.plot(test_years, predict_labels, color= 'red')
# pyplot.plot(test_years, test_labels, color= 'blue')
pyplot.ylim([0, 80])
pyplot.xlim([2018, 2020])
pyplot.plot(test_years, denormalized_predicts, color= 'red')
pyplot.plot(test_years, denormalized_labels, color= 'blue')
pyplot.show()

In [None]:
relative_error: float = 0.0

for prediction, value in zip(denormalized_predicts, denormalized_labels):
  # print(f"{prediction} | {value}")
  relative_error += (abs(prediction - value)/(max(prediction, value, 0.0001)))*100

print(f"Error relativo: {relative_error/len(denormalized_labels)}%")

## Monthly

In [None]:
monthly_sequence_length = 4

monthly_model = Sequential()
monthly_model.add(SimpleRNN(units= 6, input_shape= (monthly_sequence_length, 1), activation= "tanh"))
monthly_model.add(Dense(units= 1, activation= "tanh"))

monthly_model.compile(loss= "mean_squared_error", optimizer= "adam")

In [None]:
# print(monthly_dataframe)
monthly_normalized_dataframe, monthly_data_max, monthly_data_min = normalize_data(monthly_dataframe)

monthly_data_X, monthly_data_labels, monthly_data_years = split_data(monthly_normalized_dataframe, monthly_sequence_length)

# print(data_labels)
# from math import floor

separator = floor(len(monthly_data_X)*0.7)

monthly_train_X, monthly_train_labels = monthly_data_X[: separator], monthly_data_labels[: separator]
monthly_test_X, monthly_test_labels, monthly_test_years = monthly_data_X[separator :], monthly_data_labels[separator :], monthly_data_years[separator :]
# print(train_labels)
# print(test_labels)

In [None]:
monthly_model.fit(monthly_train_X, monthly_train_labels, batch_size= 3, epochs= 20)

In [None]:
predict_labels = monthly_model.predict(monthly_test_X)

In [None]:
denormalized_predicts = denormalize_labels(predict_labels, data_max, data_min)
denormalized_labels = denormalize_labels(monthly_test_labels, data_max, data_min)

# pyplot.plot(test_years, predict_labels, color= 'red')
# pyplot.plot(test_years, test_labels, color= 'blue')
pyplot.ylim([0, 80])
pyplot.xlim([2018, 2020])
pyplot.plot(monthly_test_years, denormalized_predicts, color= 'red')
pyplot.plot(monthly_test_years, denormalized_labels, color= 'blue')
pyplot.show()

In [None]:
relative_error: float = 0.0

for prediction, value in zip(denormalized_predicts, denormalized_labels):
  # print(f"{prediction} | {value}")
  relative_error += (abs(prediction - value)/(max(prediction, value, 0.0001)))*100

print(f"Error relativo: {relative_error/len(denormalized_labels)}%")

# Modelo Keras - LSTM

## Reformatear los datos

In [None]:
def to_LSTM_format(df:pd.DataFrame, time_steps:int = 12) -> tuple[np.ndarray, np.ndarray]:
  data_multilist: list[np.ndarray] = []
  tag_list: list[float] = []
  current: list[float] = []
  for index, row in df.iterrows():
    if len(current) == time_steps:
      tag_list.append(row["Sunspots"])
      data_multilist.append(np.array(current))
      current = current[1:] #Remove first element
    current.append(row["Sunspots"])
  return np.array(data_multilist), np.array(tag_list)

time_steps = 14
LSTM_train_dataframe = daily_dataframe[daily_dataframe.Sunspots != -1]
LSTM_train_dataframe = LSTM_train_dataframe[LSTM_train_dataframe.Fraction < 2014]
LSTM_train_data, LSTM_train_tags = to_LSTM_format(LSTM_train_dataframe)
for index, zip_item in enumerate(zip(LSTM_train_data, LSTM_train_tags)):
  dataiter, tagiter = zip_item
  print(index, "\t-->\t", dataiter, "\t|| ", tagiter)
  if index >= 20:
    break

In [None]:
# Construct LSTM network
LSTM_model = Sequential()
LSTM_model.add(kr.Input(shape=(14,1)))
LSTM_model.add(LSTM(8, return_sequences = False))
LSTM_model.add(Dense(1, kernel_initializer='normal',activation='linear'))
LSTM_model.compile(loss='mse', optimizer='adam', metrics=['accuracy'])

LSTM_model.fit(LSTM_train_data, LSTM_train_tags, epochs=80, batch_size=64)

In [None]:
validate_dataframe = daily_dataframe[daily_dataframe.Fraction >= 2014]
validate_dataframe = validate_dataframe[validate_dataframe.Fraction <= 2024]

LSTM_validation_data, LSTM_validation_tags = to_LSTM_format(validate_dataframe)
LSTM_prediction = LSTM_model.predict(LSTM_validation_data)

In [None]:
graph_dict:dict = {"Fraction":[], "Sunspots":[], "Type":[]}
mean_rel_error:float = 0
for item in zip(validate_dataframe.iterrows(), LSTM_validation_tags, LSTM_prediction):
  index, row = item[0] #Extract index and row from dataframe iterrows
  for elem in zip([item[1], item[2][0]], ["Expected", "Real"]): #Unwrap value from prediction's resulting ndarray
    graph_dict["Fraction"].append(row["Fraction"])
    graph_dict["Sunspots"].append(elem[0])
    graph_dict["Type"].append(elem[1])
  mean_rel_error += (1/len(LSTM_prediction)) * np.abs( (item[1] - item[2]) / np.maximum(item[1],item[2]) ) # Calculated as real value minus expected value, divided by expected value
mean_rel_error = mean_rel_error[0] #Unwrap value from ndarray

# Show error
print("Mean Relative Error -->", mean_rel_error)
# Graph
LSTM_graph_dataframe = pd.DataFrame(graph_dict)
seaborn.lineplot(data=LSTM_graph_dataframe,x="Fraction",y="Sunspots", hue="Type", palette=["#000088","#99FF00"])

# Análisis Hiperparámetros

# Comparación Modelos