# Introdução:

Pré-processamento:

## Importações e definições:

In [17]:
import pandas as pd
import numpy as np
import os

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error

import torch
import torch.nn as nn
import torch.optim as optim


In [18]:
df_energia = pd.read_csv('energy_dataset_.csv')
df_energia = df_energia.dropna()
df_energia

Unnamed: 0,Type_of_Renewable_Energy,Installed_Capacity_MW,Energy_Production_MWh,Energy_Consumption_MWh,Energy_Storage_Capacity_MWh,Storage_Efficiency_Percentage,Grid_Integration_Level,Initial_Investment_USD,Funding_Sources,Financial_Incentives_USD,GHG_Emission_Reduction_tCO2e,Air_Pollution_Reduction_Index,Jobs_Created
0,4,93.423205,103853.2206,248708.4892,2953.248771,89.887562,4,4.732248e+08,1,9.207772e+06,6663.816572,81.742461,1366
1,4,590.468942,190223.0649,166104.1642,5305.174042,84.403343,4,1.670697e+08,2,1.685101e+06,30656.049820,78.139042,1743
2,1,625.951142,266023.4824,424114.6308,2620.192622,60.498249,2,8.463610e+07,2,5.111813e+06,1749.613759,8.461296,363
3,1,779.998728,487039.5296,308337.7316,1925.250307,86.897861,3,3.967690e+08,2,4.805902e+06,43233.237820,8.402441,2821
4,3,242.106837,482815.0856,360437.7705,3948.945383,70.949351,2,3.574413e+07,1,1.668601e+07,14858.662760,28.822867,2583
...,...,...,...,...,...,...,...,...,...,...,...,...,...
14995,3,745.032555,280007.5738,230544.8268,4351.687893,90.791405,4,3.484136e+08,2,1.558508e+07,25234.911810,78.923200,1452
14996,1,15.187023,377340.5803,358547.3589,6792.194696,78.252040,4,2.560179e+08,3,6.866618e+06,15762.519790,54.982974,2598
14997,3,877.539059,480497.3920,214441.6719,4588.725297,58.282928,1,1.300112e+08,2,3.837764e+06,44597.809410,43.915897,2713
14998,7,551.264716,436383.1694,137043.8713,7251.144215,73.573666,2,3.334831e+08,2,5.347706e+06,34363.858000,4.877145,2128


In [19]:
df_energia_normalizado = df_energia.copy()
colunas_fixas = ['Type_of_Renewable_Energy', 'Grid_Integration_Level','Funding_Sources']
colunas_variaveis = df_energia_normalizado.columns.difference(colunas_fixas)

normalizador = StandardScaler()
df_energia_normalizado[colunas_variaveis] = normalizador.fit_transform(df_energia_normalizado[colunas_variaveis])
df_energia_normalizado

Unnamed: 0,Type_of_Renewable_Energy,Installed_Capacity_MW,Energy_Production_MWh,Energy_Consumption_MWh,Energy_Storage_Capacity_MWh,Storage_Efficiency_Percentage,Grid_Integration_Level,Initial_Investment_USD,Funding_Sources,Financial_Incentives_USD,GHG_Emission_Reduction_tCO2e,Air_Pollution_Reduction_Index,Jobs_Created
0,4,-1.396350,-1.030818,0.175876,-0.717674,1.012663,4,1.547603,1,-0.142039,-1.291580,1.086241,-0.783280
1,4,0.328286,-0.431270,-0.463380,0.095016,0.634044,4,-0.589165,2,-1.441940,0.377046,0.960051,-0.523489
2,1,0.451402,0.094910,1.533306,-0.832759,-1.016312,2,-1.164499,2,-0.849812,-1.633356,-1.480019,-1.474450
3,1,0.985912,1.629124,0.637334,-1.072891,0.806260,3,1.013991,2,-0.902672,1.251771,-1.482081,0.219363
4,3,-0.880451,1.599800,1.040524,-0.373618,-0.294790,2,-1.505734,1,1.150185,-0.721640,-0.766970,0.055357
...,...,...,...,...,...,...,...,...,...,...,...,...,...
14995,3,0.864587,0.191982,0.035312,-0.234454,1.075062,4,0.676501,2,0.959946,0.000013,0.987512,-0.724018
14996,1,-1.667812,0.867633,1.025895,0.608845,0.209372,4,0.031637,3,-0.546585,-0.658778,0.149140,0.065693
14997,3,1.324355,1.583711,-0.089307,-0.152547,-1.169253,1,-0.847809,2,-1.069965,1.346675,-0.238422,0.144940
14998,7,0.192257,1.277486,-0.688271,0.767432,-0.113613,2,0.572295,2,-0.809050,0.634919,-1.605534,-0.258185


In [20]:
TARGET = "Storage_Efficiency_Percentage"
FEATURES = [
    "Type_of_Renewable_Energy",
    "Grid_Integration_Level",
    "Funding_Sources",
    "GHG_Emission_Reduction_tCO2e",
    "Installed_Capacity_MW",
    "Energy_Production_MWh",
    "Energy_Consumption_MWh",
    "Initial_Investment_USD",
    "Air_Pollution_Reduction_Index",

]

In [21]:
X = df_energia[FEATURES]  
y = df_energia[TARGET] 

In [22]:
X_valores = X.values
y_valores = y.values

In [23]:
X_treino, X_teste, y_treino, y_teste = train_test_split(X_valores, y_valores, test_size=0.1, random_state=1249)


In [28]:
# Função que o Optuna vai otimizar
def objective(trial):
    # Hiperparâmetros a serem testados
    n_camadas = trial.suggest_int("n_layers", 1, 4)
    tamanho_camada_oculta = trial.suggest_int("hidden_size", 16, 128)
    funcao_ativacao = trial.suggest_categorical("activation", ["ReLU", "Tanh"])
    taxa_aprendizado = trial.suggest_float("lr", 1e-4, 1e-2)

    # Montando a rede com base nos hiperparâmetros
    layers = []
    input_size = X.shape[1]
    for i in range(n_camadas):
        layers.append(nn.Linear(input_size if i == 0 else tamanho_camada_oculta, tamanho_camada_oculta))
        layers.append(getattr(nn, funcao_ativacao)())
    layers.append(nn.Linear(tamanho_camada_oculta, 1))
    model = nn.Sequential(*layers)

    # Treinamento
    otimizador = optim.Adam(model.parameters(), lr=taxa_aprendizado)
    fun_perda = nn.MSELoss()
    
    for epoch in range(50):  
        model.train()
        y_pred = model(torch.tensor(X_treino, dtype=torch.float32))
        loss = fun_perda(y_pred, torch.tensor(y_teste, dtype=torch.float32))
        otimizador.zero_grad()
        loss.backward()
        otimizador.step()
    
    return loss.item()

study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=100)  # Faz 100 testes de arquitetura

print("Melhores hiperparâmetros:", study.best_params)


[I 2025-05-19 21:29:28,839] A new study created in memory with name: no-name-95ae3bd3-a2e5-45f4-b871-89c6f52334c2
  return F.mse_loss(input, target, reduction=self.reduction)
[I 2025-05-19 21:29:30,904] Trial 0 finished with value: 5017.88916015625 and parameters: {'n_layers': 4, 'hidden_size': 34, 'activation': 'Tanh', 'lr': 0.0018606138391135188}. Best is trial 0 with value: 5017.88916015625.
[I 2025-05-19 21:29:32,523] Trial 1 finished with value: 4762.7392578125 and parameters: {'n_layers': 2, 'hidden_size': 17, 'activation': 'Tanh', 'lr': 0.007627763520525465}. Best is trial 1 with value: 4762.7392578125.
[I 2025-05-19 21:29:34,404] Trial 2 finished with value: 826957824.0 and parameters: {'n_layers': 3, 'hidden_size': 59, 'activation': 'ReLU', 'lr': 0.0034170415479228542}. Best is trial 1 with value: 4762.7392578125.
[I 2025-05-19 21:29:36,302] Trial 3 finished with value: 17477386240.0 and parameters: {'n_layers': 3, 'hidden_size': 62, 'activation': 'ReLU', 'lr': 0.0044481452186

Melhores hiperparâmetros: {'n_layers': 4, 'hidden_size': 128, 'activation': 'Tanh', 'lr': 0.009991506030369737}
