# MLP (Multi-Layer Perceptron) - Produção Solar França

## Objetivo
Modelar e prever a produção de energia solar na França usando redes neurais MLP

## Dataset
- **Arquivo**: solar_france.xlsx
- **Variável Target**: Production (produção solar)
- **Frequência**: Dados horários
- **Abordagem**: Transformar série temporal em problema supervisionado

In [None]:
# Importações
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV, TimeSeriesSplit
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import os, json, warnings
warnings.filterwarnings('ignore')

plt.style.use('seaborn-v0_8')
plt.rcParams['figure.figsize']=(12,6)
sns.set_palette('husl')

output_dir='../../out/solar_france/MLP'
os.makedirs(output_dir, exist_ok=True)
np.random.seed(42)

## 1. Carregamento dos Dados

In [None]:
# Carregar dados
data_path = '../../data/solar_france.xlsx'
df = pd.read_excel(data_path)
df['Date and Hour'] = pd.to_datetime(df['Date and Hour'])
df = df.set_index('Date and Hour').sort_index().dropna()

print('Shape:', df.shape)
print('Período:', df.index.min(), '->', df.index.max())
print('\nEstatísticas:')
print(df['Production'].describe())
df.head()

## 2. Engenharia de Features

In [None]:
def create_features(data, n_lags=24):
    """
    Criar features temporais para MLP
    """
    X = pd.DataFrame(index=data.index)
    y = data['Production']
    
    # Features de lag
    for i in range(1, n_lags+1):
        X[f'lag_{i}'] = y.shift(i)
    
    # Rolling statistics
    for w in [3, 6, 12, 24, 48]:
        X[f'roll_mean_{w}'] = y.rolling(w).mean()
        X[f'roll_std_{w}'] = y.rolling(w).std()
        X[f'roll_max_{w}'] = y.rolling(w).max()
        X[f'roll_min_{w}'] = y.rolling(w).min()
    
    # Features temporais
    X['hour'] = data.index.hour
    X['dow'] = data.index.dayofweek
    X['month'] = data.index.month
    X['day'] = data.index.day
    
    # Codificação cíclica
    X['hour_sin'] = np.sin(2*np.pi*X['hour']/24)
    X['hour_cos'] = np.cos(2*np.pi*X['hour']/24)
    X['dow_sin'] = np.sin(2*np.pi*X['dow']/7)
    X['dow_cos'] = np.cos(2*np.pi*X['dow']/7)
    X['month_sin'] = np.sin(2*np.pi*X['month']/12)
    X['month_cos'] = np.cos(2*np.pi*X['month']/12)
    
    return X, y

# Criar features
X, y = create_features(df, n_lags=48)
data = pd.concat([X, y.rename('target')], axis=1).dropna()
X, y = data.drop('target', axis=1), data['target']

print(f'Features criadas: {X.shape[1]}')
print(f'Amostras: {len(X)}')
print(f'\nPrimeiras features:')
print(X.columns[:10].tolist())

## 3. Divisão e Normalização

In [None]:
# Divisão treino-teste (80-20)
train_size = int(len(X) * 0.8)
X_train, X_test = X.iloc[:train_size], X.iloc[train_size:]
y_train, y_test = y.iloc[:train_size], y.iloc[train_size:]

print(f'Treino: {len(X_train)} | Teste: {len(X_test)}')

# Normalização
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print('\nDados normalizados!')

## 4. Grid Search e Treinamento

In [None]:
# Grid de hiperparâmetros
param_grid = {
    'hidden_layer_sizes': [(64,), (128,), (128,64), (128,64,32)],
    'alpha': [0.0001, 0.001, 0.01],
    'activation': ['relu', 'tanh'],
    'learning_rate_init': [0.001, 0.01]
}

# MLP base
mlp = MLPRegressor(
    max_iter=500,
    early_stopping=True,
    validation_fraction=0.1,
    random_state=42,
    verbose=False
)

# Time Series Cross-Validation
tscv = TimeSeriesSplit(n_splits=5)

print('Iniciando Grid Search...')
grid_search = GridSearchCV(
    mlp,
    param_grid,
    cv=tscv,
    scoring='neg_mean_squared_error',
    n_jobs=-1,
    verbose=1
)

# Usar amostra para acelerar (se dataset grande)
sample_size = min(10000, len(X_train_scaled))
grid_search.fit(X_train_scaled[:sample_size], y_train.iloc[:sample_size])

print('\nMelhores parâmetros:', grid_search.best_params_)
print('Melhor CV MSE:', -grid_search.best_score_)

## 5. Modelo Final e Previsões

In [None]:
# Treinar modelo final com todos os dados de treino
best_mlp = grid_search.best_estimator_
best_mlp.fit(X_train_scaled, y_train)

# Previsões
y_pred_train = best_mlp.predict(X_train_scaled)
y_pred_test = best_mlp.predict(X_test_scaled)

print('Previsões concluídas!')

## 6. Avaliação

In [None]:
# Métricas
mse = mean_squared_error(y_test, y_pred_test)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, y_pred_test)
mape = np.mean(np.abs((y_test - y_pred_test) / (y_test + 1e-10))) * 100
r2 = r2_score(y_test, y_pred_test)

print('='*50)
print('MÉTRICAS - MLP')
print('='*50)
print(f'RMSE: {rmse:.2f}')
print(f'MAE: {mae:.2f}')
print(f'MAPE: {mape:.2f}%')
print(f'R²: {r2:.4f}')

# Visualização
fig, axes = plt.subplots(2, 1, figsize=(15, 10))

# Predições vs Real
axes[0].plot(y_test.index, y_test.values, label='Real', alpha=0.7)
axes[0].plot(y_test.index, y_pred_test, label='Previsto', alpha=0.7)
axes[0].set_title('MLP - Previsões vs Real')
axes[0].set_ylabel('Produção (MW)')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# Scatter plot
axes[1].scatter(y_test, y_pred_test, alpha=0.5)
axes[1].plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', lw=2)
axes[1].set_xlabel('Real')
axes[1].set_ylabel('Previsto')
axes[1].set_title('Real vs Previsto')
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig(f'{output_dir}/mlp_predictions.png', dpi=300, bbox_inches='tight')
plt.show()

## 7. Análise de Resíduos

In [None]:
residuals = y_test - y_pred_test

fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# Resíduos ao longo do tempo
axes[0,0].plot(y_test.index, residuals)
axes[0,0].axhline(y=0, color='r', linestyle='--')
axes[0,0].set_title('Resíduos ao Longo do Tempo')
axes[0,0].set_ylabel('Resíduo')
axes[0,0].grid(True, alpha=0.3)

# Histograma
axes[0,1].hist(residuals, bins=50, edgecolor='black', alpha=0.7)
axes[0,1].set_title('Distribuição dos Resíduos')
axes[0,1].set_xlabel('Resíduo')
axes[0,1].set_ylabel('Frequência')

# Q-Q plot
from scipy import stats
stats.probplot(residuals, dist="norm", plot=axes[1,0])
axes[1,0].set_title('Q-Q Plot')

# Resíduos vs Previsto
axes[1,1].scatter(y_pred_test, residuals, alpha=0.5)
axes[1,1].axhline(y=0, color='r', linestyle='--')
axes[1,1].set_xlabel('Valores Previstos')
axes[1,1].set_ylabel('Resíduos')
axes[1,1].set_title('Resíduos vs Previstos')
axes[1,1].grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig(f'{output_dir}/mlp_residuals.png', dpi=300, bbox_inches='tight')
plt.show()

## 8. Salvar Resultados

In [None]:
# Salvar previsões
pred_df = pd.DataFrame({
    'real': y_test,
    'previsto': y_pred_test
}, index=y_test.index)
pred_df.to_csv(f'{output_dir}/mlp_predictions.csv')

# Salvar métricas
results = {
    'model': 'MLP',
    'dataset': 'solar_france',
    'best_params': grid_search.best_params_,
    'metrics': {
        'mse': float(mse),
        'rmse': float(rmse),
        'mae': float(mae),
        'mape': float(mape),
        'r2': float(r2)
    },
    'n_features': X.shape[1],
    'train_size': len(X_train),
    'test_size': len(X_test)
}

with open(f'{output_dir}/mlp_results.json', 'w') as f:
    json.dump(results, f, indent=2)

print(f'\n✓ Resultados salvos em {output_dir}/')