In [None]:
CIDADES = {
    "Touba": {
        "arquivo": "solar-measurementssenegal-toubaifcqc.csv",
        "tipo": "wb",
        "targets": ["dhi_rsi", "ghi_pyr", "ghi_sil"],
        "ignorar": ["dhi_rsi", "ghi_pyr", "ghi_sil"],
        "neurons": [[50, 50], [100, 200], [100, 50]],
        "dropouts": [[0.25, 0.25], [0.25, 0.25], [0, 0]],
        "epochs": [20, 40, 30],
        "batch_size": [128, 128, 128]
    },

    "Fatick": {
        "arquivo": "solar-measurementssenegal-fatickifcqc.csv",
        "tipo": "wb",
        "targets": ["dhi_rsi", "ghi_pyr", "ghi_sil"],
        "ignorar": ["dhi_rsi", "ghi_pyr", "ghi_sil"],
        "neurons": [[100, 50], [100, 50], [50, 50]],
        "dropouts": [[0.25, 0.25], [0.0, 0.25], [0.25, 0.25]],
        "epochs": [20, 50, 150],
        "batch_size": [128, 128, 128]
    },

    "SA Northern Cape": {
        "arquivo": "Timeseries_SA_northern_cape_2005_2016.csv",
        "tipo": "tmy",
        "targets": ["GSR"],
        "ignorar": ["GSR", "DSR"],
        "neurons": [[100, 50]],
        "dropouts": [[0, 0]],
        "epochs": [20],
        "batch_size": [512]
    },

    "CAR Vakaga": {
        "arquivo": "Timeseries_CAR_vakaga_2005_2016.csv",
        "tipo": "tmy",
        "targets": ["GSR"],
        "ignorar": ["GSR", "DSR"],
        "neurons": [[200, 200, 100]],
        "dropouts": [[0, 0, 0]],
        "epochs": [30],
        "batch_size": [512]
    },

    "Egypt Mut": {
        "arquivo": "Timeseries_egypt_mut_2005_2016.csv",
        "tipo": "tmy",
        "targets": ["GSR"],
        "ignorar": ["GSR", "DSR"],
        "neurons": [[200, 200, 100]],
        "dropouts": [[0, 0, 0]],
        "epochs": [7],
        "batch_size": [128]
    },

    "Algeria Tamanrasset": {
        "arquivo": "Timeseries_tamaransset_2005_2016.csv",
        "tipo": "tmy",
        "targets": ["GSR"],
        "ignorar": ["GSR", "DSR"],
        "neurons": [[100, 100, 50]],
        "dropouts": [[0, 0, 0]],
        "epochs": [100],
        "batch_size": [512]
    },

    "Nigeria Borno": {
        "arquivo": "Timeseries_nigeria_borno_2005_2016.csv",
        "tipo": "tmy",
        "targets": ["DSR"],
        "ignorar": ["GSR", "DSR"],
        "neurons": [[100, 50]],
        "dropouts": [[0, 0]],
        "epochs": [50],
        "batch_size": [512]
    },

    "Nigeria Abuja": {
        "arquivo": "SARAH_nigeria_abuja.csv",
        "tipo": "sarah",
        "targets": ["DNI"],
        "ignorar": ["DNI"],
        "neurons": [[200, 200, 50]],
        "dropouts": [[0, 0, 0]],
        "epochs": [100],
        "batch_size": [128]
    },

    "Nigeria Akure": {
        "arquivo": "SARAH_nigeria_akure.csv",
        "tipo": "sarah",
        "targets": ["DNI"],
        "ignorar": ["DNI"],
        "neurons": [[200, 200, 100]],
        "dropouts": [[0, 0, 0]],
        "epochs": [100],
        "batch_size": [128]
    }
}

OTIMIZADORES = ["adam", "sgd"]

LRS = [1e-3, 5e-4, 1e-4]


In [None]:
def carregar_wb(path):
    df = pd.read_csv(path, sep=';')
    df['time'] = pd.to_datetime(df['time'])

    df['year'] = df['time'].dt.year
    df['month'] = df['time'].dt.month
    df['day'] = df['time'].dt.day
    df['hour'] = df['time'].dt.hour
    df['minute'] = df['time'].dt.minute

    df.drop(columns=['time', 'comments'], inplace=True)

    return df[
        ['year','month','day','hour','minute','air_temperature',
         'relative_humidity','wind_speed','wind_from_direction',
         'wind_speed_calc','sensor_cleaning','precipitation',
         'barometric_pressure','dhi_rsi','ghi_sil','ghi_pyr']
    ]


In [None]:
import pandas as pd

In [None]:
def carregar_tmy(path):
    df = pd.read_csv(
        path, sep=',', skiprows=8, skipfooter=12, engine='python'
    )

    df['time'] = pd.to_datetime(df['time'], format="%Y%m%d:%H%M")

    df['year'] = df['time'].dt.year
    df['month'] = df['time'].dt.month
    df['day'] = df['time'].dt.day
    df['hour'] = df['time'].dt.hour

    df.drop(columns=['time','Int','Gr(i)'], inplace=True)

    df = df.rename(columns={ 'Gb(i)': 'GSR', 'Gd(i)': 'DSR' })

    return df[['year','month','day','hour','H_sun','T2m','WS10m','GSR','DSR']]


In [None]:
def carregar_sarah(path):
  df = pd.read_csv(path, sep=';')

  df['time'] = pd.to_datetime(df['time'], format="%d/%m/%Y")

  df['year'] = df['time'].dt.year
  df['month'] = df['time'].dt.month
  df['day'] = df['time'].dt.day

  df.drop(columns=['time'], inplace=True)

  return df[['year','month','day','SDU','DNI']]


In [None]:
def carregar_dataframe(cidade, config):
    if config["tipo"] == "wb":
        return carregar_wb(config["arquivo"])
    elif config["tipo"] == "tmy":
        return carregar_tmy(config["arquivo"])
    elif config["tipo"] == "sarah":
      return carregar_sarah(config["arquivo"])

In [None]:
import torch
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import StandardScaler

In [None]:
def criar_loaders(df, target, ignorar, batch_size):
    X = df.drop(columns=ignorar).values
    y = df[target].values.reshape(-1, 1)

    # scaler_X = StandardScaler()
    # scaler_y = StandardScaler()

    # X = scaler_X.fit_transform(X)
    # y = scaler_y.fit_transform(y)

    n = len(df)
    t = int(0.9 * n)

    X_train = torch.tensor(X[:t], dtype=torch.float32)
    y_train = torch.tensor(y[:t], dtype=torch.float32)
    X_test  = torch.tensor(X[t:], dtype=torch.float32)
    y_test  = torch.tensor(y[t:], dtype=torch.float32)

    train_loader = DataLoader(
        TensorDataset(X_train, y_train),
        batch_size=batch_size,
        shuffle=False
    )

    test_loader = DataLoader(
        TensorDataset(X_test, y_test),
        batch_size=batch_size,
        shuffle=False
    )

    return train_loader, test_loader, X_train.shape[1]#, scaler_y


In [None]:
import torch.nn as nn

class Model(nn.Module):
    def __init__(self, num_features, num_outputs, neurons, dropouts):
        super().__init__()

        layers = []
        in_f = num_features

        for out_f, drop in zip(neurons, dropouts):
            layers.append(nn.Linear(in_f, out_f))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(drop))
            in_f = out_f

        layers.append(nn.Linear(in_f, num_outputs))

        self.rede = nn.Sequential(*layers)

    def forward(self, x):
        return self.rede(x)


In [None]:
models_data = []

for cidade, cfg in CIDADES.items():

    df = carregar_dataframe(cidade, cfg)
    df = df.dropna()

    for idx, target in enumerate(cfg["targets"]):

        #train_loader, test_loader, n_features, scaler_y = criar_loaders(
        train_loader, test_loader, n_features = criar_loaders(
            df,
            target,
            cfg["ignorar"],
            cfg["batch_size"][idx]
        )

        params = {
            "neurons": cfg["neurons"][idx],
            "dropouts": cfg["dropouts"][idx],
            "epochs": cfg["epochs"][idx]
        }

        models_data.append({
            "tipo": cfg['tipo'],
            "cidade": cidade,
            "target": target,
            "qnt_features": n_features,
            "train_loader": train_loader,
            "test_loader": test_loader,
            "params": params
            #"scaler_y": scaler_y
        })


In [None]:
import torch
import numpy as np
import os

In [None]:
os.makedirs("output", exist_ok=True)

for md in models_data:
  for opt_name in OTIMIZADORES:
      for lr in LRS:

        model = Model(
            num_features=md["qnt_features"],
            num_outputs=1,
            neurons=md["params"]["neurons"],
            dropouts=md["params"]["dropouts"]
        )

        criterio = nn.MSELoss()

        if opt_name == "adam":
          opt = torch.optim.Adam(
              model.parameters(),
              lr=lr
          )
        elif opt_name == "sgd":
            opt = torch.optim.SGD(
                model.parameters(),
                lr=lr,
                momentum=0.9,
                weight_decay=1e-4
            )

        best_loss = np.inf
        best_state = None

        print(f"\nTreinando {md['cidade']} - {md['target']} | opt={opt_name} | lr={lr}")


        epochs = md["params"]["epochs"]
        for epoch in range(epochs):

            epoch_loss = 0
            for Xb, yb in md["train_loader"]:

                pred = model(Xb)
                loss = criterio(pred, yb)

                opt.zero_grad()
                loss.backward()
                torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0) # para SGD apenas
                opt.step()

                epoch_loss += loss.item()

            epoch_loss /= len(md["train_loader"])
            print(f"Epoch {epoch+1}/{epochs} | Loss: {epoch_loss:.4f}")

            if epoch_loss < best_loss:
                best_loss = epoch_loss
                best_state = model.state_dict()

        torch.save(best_state, f"output/{md['cidade']}_{md['target']}_{opt_name}_{lr}.pth")



Treinando Nigeria Abuja - DNI | opt=adam | lr=0.001
Epoch 1/100 | Loss: 8910.4555
Epoch 2/100 | Loss: 8075.5114
Epoch 3/100 | Loss: 7986.0753
Epoch 4/100 | Loss: 7880.9653
Epoch 5/100 | Loss: 8084.1685
Epoch 6/100 | Loss: 7865.5479
Epoch 7/100 | Loss: 7781.1363
Epoch 8/100 | Loss: 7756.0543
Epoch 9/100 | Loss: 7650.7710
Epoch 10/100 | Loss: 7302.8310
Epoch 11/100 | Loss: 7385.4054
Epoch 12/100 | Loss: 6397.4898
Epoch 13/100 | Loss: 5754.5896
Epoch 14/100 | Loss: 6435.6766
Epoch 15/100 | Loss: 4749.9973
Epoch 16/100 | Loss: 3400.4294
Epoch 17/100 | Loss: 4284.8230
Epoch 18/100 | Loss: 4472.4180
Epoch 19/100 | Loss: 3138.0044
Epoch 20/100 | Loss: 2985.9199
Epoch 21/100 | Loss: 5859.4262
Epoch 22/100 | Loss: 2423.2697
Epoch 23/100 | Loss: 2199.2530
Epoch 24/100 | Loss: 2491.1724
Epoch 25/100 | Loss: 2335.2871
Epoch 26/100 | Loss: 2066.7455
Epoch 27/100 | Loss: 3234.1003
Epoch 28/100 | Loss: 2466.8543
Epoch 29/100 | Loss: 2350.6411
Epoch 30/100 | Loss: 2193.1961
Epoch 31/100 | Loss: 3293.

In [None]:
import matplotlib.pyplot as plt

def criar_plot(reals, preds, intervalo, xlabel, ylabel, marker, mdl, opt_name, lr):

  plt.figure(figsize=(12, 5))

  if marker:
    plt.plot(intervalo, reals, label="Real Data", linewidth=0.9, color='black', marker='o')
    plt.plot(intervalo, preds, label="Predição", linewidth=0.9, color='red', marker='+')
  else:
    plt.plot(intervalo, reals, label="Real Data", linewidth=0.9, color='black')
    plt.plot(intervalo, preds, label="Predição", linewidth=0.9, color='red')

  plt.xlabel(xlabel)
  plt.ylabel(ylabel)
  plt.title("Real vs Predicted")
  plt.grid(True, alpha=0.3)
  plt.legend()
  plt.tight_layout()

  os.makedirs('output_plots', exist_ok=True)
  save_path = os.path.join('output_plots', f"{mdl['cidade']}_{mdl['target']}_{opt_name}_{lr}.png")
  plt.savefig(save_path, dpi=300, bbox_inches='tight')

  plt.show()
  plt.close()

In [None]:
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error

metricas = []

for md in models_data:
  for opt_name in OTIMIZADORES:
    for lr in LRS:

      caminho = f"output/{md['cidade']}_{md['target']}_{opt_name}_{lr}.pth"

      if not os.path.exists(caminho):
        print(f"Modelo não encontrado: {caminho}")
        continue

      model = Model(
          num_features=md["qnt_features"],
          num_outputs=1,
          neurons=md["params"]["neurons"],
          dropouts=md["params"]["dropouts"]
      )

      model.load_state_dict(torch.load(caminho))
      model.eval()

      preds, reals = [], []

      with torch.no_grad():
          for Xb, yb in md["test_loader"]:
              pred = model(Xb).cpu().numpy().reshape(-1)
              preds.append(pred)
              reals.append(yb.cpu().numpy().reshape(-1))

      preds = np.concatenate(preds).reshape(-1, 1)
      reals = np.concatenate(reals).reshape(-1, 1)

      # DESNORMALIZAÇÃO AQUI
      # scaler_y = md["scaler_y"]
      # preds = scaler_y.inverse_transform(preds).reshape(-1)
      # reals = scaler_y.inverse_transform(reals).reshape(-1)

      # print("NaNs reals:", np.isnan(reals).any())
      # print("NaNs preds:", np.isnan(preds).any())
      # print("std reals:", np.std(reals))
      # print("std preds:", np.std(preds))

      mae = mean_absolute_error(reals, preds)
      rmse = np.sqrt(mean_squared_error(reals, preds))
      #r = np.corrcoef(reals, preds)[0,1] ** 2
      r = np.corrcoef(reals.reshape(-1), preds.reshape(-1))[0, 1] ** 2

      print(md["cidade"], md["target"], opt_name, lr, ": ", mae, rmse, r)

      metricas.append({
          "cidade": md["cidade"],
          "target": md["target"],
          "optimizer": opt_name,
          "lr": lr,
          "MAE": mae,
          "RMSE": rmse,
          "R": r
      })

      dataset_type = md['tipo']
      target = md['target']

      if dataset_type == 'wb':
        s, f = 9_500, 10_500

        if md['cidade'] == 'Touba':
          s += 720
          f += 720

        reals = reals[s: f]
        preds = preds[s: f]
        intervalo = list(range(s, f))
        xlabel = 'Time [min]'
        marker = False

        if target == 'dhi_rsi':
          ylabel = 'Diffused Horizontal Irradiance in W/m²'
        elif target == 'ghi_pyr':
          ylabel = 'Global Horizontal Irradiance from thermopile pyranometer in W/m²'
        elif target == 'ghi_sil':
          ylabel = 'Global Horizontal Irradiance from silicon pyranometer in W/m²'

      elif dataset_type == 'tmy':
        s, f = 5000, 5070
        reals = reals[s:f]
        preds = preds[s:f]
        intervalo = list(range(s, f))
        xlabel = 'Time [h]'
        marker = True

        if target == 'GSR':
          ylabel = 'Global beam direct solar irradiance in W/m²'
        elif target == 'DSR':
          ylabel = 'Diffused solar irradiance in W/m2'

      elif dataset_type == 'sarah':
        s, f = 0, 1_140
        reals = reals[s:f]
        preds = preds[s:f]
        intervalo = list(range(s, f))
        xlabel = 'Timestep [Days]'
        marker = False

        ylabel = 'Direct Normal Irradiance [W/m²]'



      criar_plot(reals, preds, intervalo, xlabel, ylabel, marker, md, opt_name, lr)


In [None]:
df_metricas = pd.DataFrame(metricas)
df_metricas.to_csv("metricas_modelos.csv", index=False)

In [None]:
import shutil
from google.colab import files

shutil.make_archive("output_plots", "zip", "output_plots")
files.download("output_plots.zip")

shutil.make_archive("output", "zip", "output")
files.download("output.zip")

files.download('metricas_modelos.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
import pandas as pd
from openpyxl import load_workbook
from openpyxl.utils import get_column_letter


df_metricas.to_excel("resultado_formatado.xlsx", index=False)

wb = load_workbook("resultado_formatado.xlsx")
ws = wb.active

# Mescla Touba (linhas 2–4)
ws.merge_cells("A2:A4")

# Mescla Fatick (linhas 5–7)
ws.merge_cells("A5:A7")

wb.save("resultado_formatado.xlsx")
files.download('resultado_formatado.xlsx')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>