In [None]:
# notebook: notebooks/carbon_price_prediction.ipynb

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.dummy import DummyRegressor

# Configurações iniciais
sns.set(style='whitegrid')
os.makedirs("results/figures", exist_ok=True)

# Carregar os dados
carbon_price = pd.read_csv('data/carbon_price.csv')
emissions = pd.read_csv('data/emissions.csv')
gdp = pd.read_csv('data/gdp.csv')
deforestation = pd.read_csv('data/deforestation.csv')

# Exibir amostras iniciais
display(carbon_price.head())
display(emissions.head())
display(gdp.head())
display(deforestation.head())

# Unificar e preparar o dataset principal
# OBS: Ajuste os nomes de colunas conforme os CSVs reais

# Exemplo de renomeação e merge
carbon_price = carbon_price.rename(columns={"Year": "year", "Country": "country", "Price": "CarbonPrice"})
emissions = emissions.rename(columns={"Year": "year", "Country": "country", "Emissions": "Emissions"})
gdp = gdp.rename(columns={"Year": "year", "Country": "country", "GDP": "GDP"})
deforestation = deforestation.rename(columns={"Year": "year", "Country": "country", "Deforestation": "Deforestation"})

df = carbon_price.merge(emissions, on=["country", "year"], how="inner")
df = df.merge(gdp, on=["country", "year"], how="inner")
df = df.merge(deforestation, on=["country", "year"], how="inner")

# Visualização inicial dos dados
sns.pairplot(df[["CarbonPrice", "Emissions", "GDP", "Deforestation"]])
plt.show()

# Separar variáveis de entrada e saída
X = df[["Emissions", "GDP", "Deforestation"]]
y = df["CarbonPrice"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Modelos para teste
modelos = {
    'Baseline': DummyRegressor(strategy='mean'),
    'Regressão Linear': LinearRegression(),
    'MLP': MLPRegressor(max_iter=1000, random_state=42),
    'KNN': KNeighborsRegressor(),
    'Random Forest': RandomForestRegressor(random_state=42),
    'Decision Tree': DecisionTreeRegressor(random_state=42)
}

# Treinamento e avaliação
resultados = {}
for nome, modelo in modelos.items():
    modelo.fit(X_train, y_train)
    pred = modelo.predict(X_test)
    resultados[nome] = mean_squared_error(y_test, pred)

# Visualizar EQMs
plt.figure(figsize=(10,6))
sns.barplot(x=list(resultados.keys()), y=list(resultados.values()))
plt.ylabel('Erro Quadrático Médio (EQM)')
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig("results/figures/figura_eqm_modelos.png")
plt.show()