In [9]:
# ==============================================================================
# @title 0. Instalasi Library
# ==============================================================================
!pip install holidays -q
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.preprocessing import MinMaxScaler
import os
import json
import joblib
import warnings
import holidays

warnings.filterwarnings('ignore')

# ==============================================================================
# @title 1. KONFIGURASI UTAMA
# ==============================================================================
# --- Path Folder ---
SOURCE_DATA_DIR = 'sumber_data' # Folder data training (historis 2024)
RESULTS_DIR = 'hasil_analisis_rekomendasi' # Folder output untuk semua hasil
EXPORT_DIR = 'exported_models_per_lantai' # Folder untuk menyimpan model final

# --- Konfigurasi Model & Data ---
TARGET_VARIABLE = 'Konsumsi Energi'
RELEVANT_COLUMNS = [
    'Konsumsi Energi', 'Temperature', 'Showers', 'Cloud Cover', 'Weather Code',
    'Relative Humidity', 'Dew Point', 'Precipitation',
    'Pressure MSL', 'Surface Pressure', 'Evapotranspiration',
    'Vapour Pressure Deficit', 'Wind Speed', 'Wind Direction', 'Wind Gusts',
    'Soil Temperature', 'Sunshine Duration', 'UV Index', 'Direct Radiation',
    'Current', 'Power Factor'
]
MINIMUM_ROWS = 500

# --- Konfigurasi Rekomendasi ---
DEVICE_OPERATING_HOURS = {
    'ahu': (8, 16), 'sdp': (0, 23), 'lift': (7, 20), 'chiller': (8, 17)
}
CORE_BUSINESS_HOURS = (9, 17) # Jam 9 pagi sampai 5 sore
DEFAULT_OPERATING_HOURS = (8, 17)

# --- Membuat Folder jika belum ada ---
os.makedirs(RESULTS_DIR, exist_ok=True)
os.makedirs(EXPORT_DIR, exist_ok=True)
os.makedirs(SOURCE_DATA_DIR, exist_ok=True)

print("✅ Konfigurasi selesai. Skrip akan berjalan menggunakan data dari 'sumber_data'.")

# ==============================================================================
# @title 2. Definisi Fungsi-Fungsi Pembantu
# ==============================================================================

def train_and_evaluate_models(X_train, y_train, X_val, y_val, X_test, y_test):
    """Melatih semua model dan mengembalikan hasilnya dengan metrik lengkap."""
    results = {}
    
    # --- Model 1: Random Forest Regressor ---
    rf_model = RandomForestRegressor(n_estimators=100, random_state=42, n_jobs=-1)
    rf_model.fit(X_train, y_train)
    y_pred_rf = rf_model.predict(X_test)
    results['RandomForest'] = {
        'model': rf_model, 
        'metrics': {
            'mae': mean_absolute_error(y_test, y_pred_rf),
            'rmse': np.sqrt(mean_squared_error(y_test, y_pred_rf)),
            'r2': r2_score(y_test, y_pred_rf)
        }
    }
    
    # --- Model 2: Gradient Boosting Regressor ---
    gb_model = GradientBoostingRegressor(n_estimators=100, random_state=42)
    gb_model.fit(X_train, y_train)
    y_pred_gb = gb_model.predict(X_test)
    results['GradientBoosting'] = {
        'model': gb_model, 
        'metrics': {
            'mae': mean_absolute_error(y_test, y_pred_gb),
            'rmse': np.sqrt(mean_squared_error(y_test, y_pred_gb)),
            'r2': r2_score(y_test, y_pred_gb)
        }
    }

    # --- Model 3: LSTM ---
    scaler_X = MinMaxScaler(); scaler_y = MinMaxScaler()
    X_train_s = scaler_X.fit_transform(X_train); y_train_s = scaler_y.fit_transform(y_train.values.reshape(-1, 1))
    X_val_s = scaler_X.transform(X_val); y_val_s = scaler_y.transform(y_val.values.reshape(-1, 1))
    X_test_s = scaler_X.transform(X_test)
    X_train_r = X_train_s.reshape((X_train_s.shape[0], 1, X_train_s.shape[1]))
    X_val_r = X_val_s.reshape((X_val_s.shape[0], 1, X_val_s.shape[1]))
    X_test_r = X_test_s.reshape((X_test_s.shape[0], 1, X_test_s.shape[1]))
    lstm = Sequential([LSTM(50, activation='relu', input_shape=(1, X_train_r.shape[2])), Dense(1)])
    lstm.compile(optimizer='adam', loss='mse')
    lstm.fit(X_train_r, y_train_s, epochs=50, batch_size=32, validation_data=(X_val_r, y_val_s), verbose=0, shuffle=False)
    y_pred = scaler_y.inverse_transform(lstm.predict(X_test_r, verbose=0))
    results['LSTM'] = {
        'model': lstm, 
        'metrics': {
            'mae': mean_absolute_error(y_test, y_pred),
            'rmse': np.sqrt(mean_squared_error(y_test, y_pred)),
            'r2': r2_score(y_test, y_pred)
        },
        'scaler_X': scaler_X, 
        'scaler_y': scaler_y
    }
    return results

def analyze_feature_importance(model, features, device_identifier, output_dir):
    """Menganalisis dan memvisualisasikan fitur yang paling berpengaruh."""
    print("   - Menganalisis Fitur Paling Berpengaruh...")
    if not hasattr(model, 'feature_importances_'):
        print("     - Analisis feature importance tidak didukung untuk model LSTM.")
        return

    importance_df = pd.DataFrame({'Fitur': features, 'Tingkat Pengaruh': model.feature_importances_})
    importance_df = importance_df.sort_values(by='Tingkat Pengaruh', ascending=False).head(10)

    plt.figure(figsize=(12, 8))
    sns.barplot(x='Tingkat Pengaruh', y='Fitur', data=importance_df, palette='viridis')
    plt.title(f'Fitur Paling Berpengaruh Terhadap Konsumsi Energi\n({device_identifier})', fontsize=16)
    plt.xlabel('Tingkat Pengaruh'); plt.ylabel('Fitur')
    plt.tight_layout()
    plot_path = os.path.join(output_dir, f'feature_importance_{device_identifier}.png')
    plt.savefig(plot_path); plt.close()
    print(f"     - ✓ Visualisasi pengaruh fitur disimpan di: {plot_path}")

def predict_on_test_set(model, X_test, y_test, scalers=None):
    """Melakukan prediksi pada test set dan mengembalikan DataFrame perbandingan."""
    if isinstance(model, tf.keras.Model):
        X_test_s = scalers['X'].transform(X_test)
        X_test_r = X_test_s.reshape((X_test_s.shape[0], 1, X_test_s.shape[1]))
        predictions_s = model.predict(X_test_r, verbose=0)
        predictions = scalers['y'].inverse_transform(predictions_s).flatten()
    else:
        predictions = model.predict(X_test)
    
    predictions = np.maximum(0, predictions)
    
    df_results = pd.DataFrame({
        'timestamp': y_test.index,
        'Aktual_Konsumsi_kWh': y_test.values / 1000,
        'Prediksi_Konsumsi_kWh': predictions / 1000
    })
    return df_results.sort_values('timestamp')

def generate_recommendations(df_results, device_identifier, hours_config, default_hours, core_hours):
    """
    Menganalisis hasil prediksi untuk memberikan rekomendasi yang masuk akal.
    """
    device_type = device_identifier.split('-')[1].lower()
    operating_hours = hours_config.get(device_type, default_hours)
    
    print(f"\n   --- Sistem Rekomendasi Penjadwalan untuk {device_identifier.upper()} ---")
    
    df_operating = df_results[(df_results['timestamp'].dt.hour >= operating_hours[0]) & 
                              (df_results['timestamp'].dt.hour <= operating_hours[1]) &
                              (df_results['timestamp'].dt.dayofweek < 5)].copy()

    if df_operating.empty:
        print(f"     - Tidak ada data pada jam operasional ({operating_hours[0]}:00 - {operating_hours[1]}:00) yang ditemukan.")
        return None

    df_business = df_operating[(df_operating['timestamp'].dt.hour >= core_hours[0]) &
                               (df_operating['timestamp'].dt.hour <= core_hours[1])]

    if df_business.empty:
        print(f"     - Tidak ada data pada jam kerja inti ({core_hours[0]}:00 - {core_hours[1]}:00) yang ditemukan.")
        return None

    duration_days = (df_operating['timestamp'].max() - df_operating['timestamp'].min()).days + 1

    hourly_avg_operating = df_operating.groupby(df_operating['timestamp'].dt.hour)['Prediksi_Konsumsi_kWh'].mean()
    hourly_avg_business = df_business.groupby(df_business['timestamp'].dt.hour)['Prediksi_Konsumsi_kWh'].mean()

    peak_hour = hourly_avg_operating.idxmax()
    peak_consumption = hourly_avg_operating.max()
    off_peak_hour = hourly_avg_business.idxmin()
    off_peak_consumption = hourly_avg_business.min()
    total_predicted_consumption = df_operating['Prediksi_Konsumsi_kWh'].sum()
    
    num_peak_hours = len(df_operating[df_operating['timestamp'].dt.hour == peak_hour])
    potential_savings_kwh = (peak_consumption - off_peak_consumption) * num_peak_hours
    potential_savings_percent = (potential_savings_kwh / total_predicted_consumption) * 100 if total_predicted_consumption > 0 else 0

    print(f"     - Analisis untuk jam operasional ({operating_hours[0]}:00 - {operating_hours[1]}:00), Senin-Jumat (berdasarkan data uji selama ~{duration_days} hari):")
    print(f"       - Total Perkiraan Konsumsi: {total_predicted_consumption:,.2f} kWh")
    print(f"       - Jam Puncak Konsumsi     : Pukul {peak_hour}:00 (rata-rata {peak_consumption:.2f} kWh)")
    print(f"       - Jam Lembah (di jam kerja): Pukul {off_peak_hour}:00 (rata-rata {off_peak_consumption:.2f} kWh)")
    print("\n     - REKOMENDASI:")
    print(f"       - Pertimbangkan mengurangi beban pada jam puncak sekitar pukul {peak_hour}:00.")
    print(f"       - Jika memungkinkan, geser sebagian beban kerja ke jam lembah (saat jam kerja) sekitar pukul {off_peak_hour}:00.")
    if potential_savings_kwh > 0 and peak_hour != off_peak_hour:
        print(f"       - POTENSI PENGHEMATAN: Dengan menggeser beban dari jam puncak ke jam lembah,")
        print(f"         Anda berpotensi menghemat ~{potential_savings_kwh:.2f} kWh ({potential_savings_percent:.2f}%) selama periode ~{duration_days} hari.")
        
    return {
        'total_consumption': total_predicted_consumption,
        'potential_savings': potential_savings_kwh,
        'duration_days': duration_days
    }

def create_savings_heatmap(savings_df, output_dir):
    """Membuat dan menyimpan heatmap dari data potensi penghematan."""
    if savings_df.empty:
        print("\nTidak ada data penghematan untuk membuat heatmap.")
        return

    try:
        heatmap_pivot = savings_df.pivot_table(
            index='Gedung', 
            columns='Kelompok Perangkat', 
            values='Potensi Penghematan (kWh)',
            aggfunc='sum'
        )

        plt.figure(figsize=(max(12, len(heatmap_pivot.columns) * 2), max(6, len(heatmap_pivot) * 1)))
        sns.heatmap(heatmap_pivot, annot=True, fmt=".2f", cmap="Greens", linewidths=.5)
        plt.title('Heatmap Potensi Penghematan Energi per Kelompok Perangkat (kWh)', fontsize=16)
        plt.xlabel('Kelompok Perangkat'); plt.ylabel('Gedung')
        plt.xticks(rotation=45, ha='right'); plt.yticks(rotation=0)
        plt.tight_layout()
        heatmap_path = os.path.join(output_dir, 'heatmap_potensi_penghematan.png')
        plt.savefig(heatmap_path); plt.close()
        print(f"\n✅ Heatmap potensi penghematan disimpan di: {heatmap_path}")

    except Exception as e:
        print(f"\n❌ Gagal membuat heatmap: {e}")

def analyze_and_visualize_overall_consumption(consumption_data, unit_counts, output_dir):
    """Menganalisis dan memvisualisasikan konsumsi rata-rata per unit perangkat."""
    if not consumption_data:
        print("Tidak ada data konsumsi untuk dianalisis.")
        return
        
    df = pd.DataFrame(list(consumption_data.items()), columns=['Tipe Perangkat', 'Total Konsumsi (Wh)'])
    df['Jumlah Unit'] = df['Tipe Perangkat'].map(unit_counts)
    df['Total Konsumsi (kWh)'] = df['Total Konsumsi (Wh)'] / 1000
    
    # Hitung rata-rata per unit
    df['Rata-rata per Unit (kWh)'] = df['Total Konsumsi (kWh)'] / df['Jumlah Unit']
    
    # Hitung persentase berdasarkan rata-rata
    df['Persentase (%)'] = (df['Rata-rata per Unit (kWh)'] / df['Rata-rata per Unit (kWh)'].sum()) * 100
    df = df.sort_values(by='Rata-rata per Unit (kWh)', ascending=False)
    
    print(f"\n{'='*80}\nANALISIS KONSUMSI ENERGI RATA-RATA PER UNIT (BERDASARKAN DATA HISTORIS)\n{'='*80}")
    print(df[['Tipe Perangkat', 'Jumlah Unit', 'Rata-rata per Unit (kWh)', 'Persentase (%)']].to_string(index=False))
    
    # Visualisasi Pie Chart
    plt.figure(figsize=(12, 10))
    plt.pie(df['Rata-rata per Unit (kWh)'], labels=df['Tipe Perangkat'], autopct='%1.1f%%',
            startangle=140, pctdistance=0.85, colors=sns.color_palette('viridis', len(df)))
    centre_circle = plt.Circle((0,0),0.70,fc='white')
    fig = plt.gcf()
    fig.gca().add_artist(centre_circle)
    plt.title('Distribusi Konsumsi Energi Rata-Rata per Unit Perangkat', fontsize=16)
    plt.axis('equal')
    pie_chart_path = os.path.join(output_dir, 'pie_chart_distribusi_konsumsi_rata_rata.png')
    plt.savefig(pie_chart_path)
    plt.close()
    print(f"\n✅ Pie chart distribusi konsumsi rata-rata disimpan di: {pie_chart_path}")

# ==============================================================================
# @title 3. PROSES UTAMA END-TO-END
# ==============================================================================

savings_aggregator = {}
device_type_consumption_aggregator = {}
# 🚀 BARU: Dictionary untuk menghitung jumlah unit per tipe perangkat
device_type_counter = {}

# Dapatkan tahun unik dari semua data untuk holidays
all_years = []
for root, dirs, files in os.walk(SOURCE_DATA_DIR):
    for file in files:
        if file.endswith('.csv'):
            try:
                df_time = pd.read_csv(os.path.join(root, file), usecols=['id_time'], parse_dates=['id_time'])
                all_years.extend(df_time['id_time'].dt.year.unique())
            except Exception: continue
unique_years = sorted(list(set(np.unique(all_years))))
id_holidays = holidays.Indonesia(years=unique_years)
print(f"\n📅 Mengambil data hari libur nasional Indonesia untuk tahun: {unique_years}")

# Loop utama untuk setiap file CSV di folder sumber
for root, dirs, files in os.walk(SOURCE_DATA_DIR):
    for file in files:
        if not file.endswith('.csv'):
            continue

        # --- 1. IDENTIFIKASI & PERSIAPAN DATA ---
        file_path = os.path.join(root, file)
        
        try:
            path_parts = os.path.relpath(root, SOURCE_DATA_DIR).split(os.sep)
            building_name = path_parts[0]
            if len(path_parts) > 2:
                floor_name = path_parts[1]
                device_type = path_parts[2]
                device_identifier = f"{building_name}-{device_type}-{floor_name}"
            else:
                device_type = path_parts[1]
                device_identifier = f"{building_name}-{device_type}"
        except IndexError:
            print(f"  - ⚠️ Peringatan: Struktur folder untuk file {file} tidak dikenali. Melewati.")
            continue
        
        print(f"\n{'='*80}\nMemproses Perangkat: {device_identifier.upper()}\n{'='*80}")
        
        try:
            df_full = pd.read_csv(file_path, index_col='id_time', parse_dates=True)
            
            print("   - Membuat fitur tambahan...")
            current_cols = ['id_i1', 'id_i2', 'id_i3']
            if all(col in df_full.columns for col in current_cols):
                for col in current_cols:
                    df_full[col] = pd.to_numeric(df_full[col], errors='coerce')
                df_full.dropna(subset=current_cols, inplace=True)
                df_full['Current'] = df_full[current_cols].sum(axis=1)
                print("     - ✓ Fitur 'Current' berhasil dibuat.")

            if 'Power Factor' in df_full.columns:
                df_full['Power Factor'] = pd.to_numeric(df_full['Power Factor'], errors='coerce')
                print("     - ✓ Fitur 'Power Factor' dipastikan numerik.")

            existing_cols = [col for col in RELEVANT_COLUMNS if col in df_full.columns]
            df_full = df_full[existing_cols].copy()
            
            for col in existing_cols:
                if col != TARGET_VARIABLE:
                     df_full[col] = pd.to_numeric(df_full[col], errors='coerce')

            df_full.dropna(subset=[TARGET_VARIABLE], inplace=True)
            df_full['Konsumsi_Energi_Lag_1'] = df_full[TARGET_VARIABLE].shift(1)
            df_full['is_weekend'] = (df_full.index.dayofweek >= 5).astype(int)
            df_full['isHoliday'] = df_full.index.isin(id_holidays).astype(int)
            df_full.dropna(inplace=True)
            df_final = df_full[df_full[TARGET_VARIABLE] > 0].copy()

            device_type_key = device_type.upper()
            total_consumption_wh = df_final[TARGET_VARIABLE].sum()
            device_type_consumption_aggregator[device_type_key] = device_type_consumption_aggregator.get(device_type_key, 0) + total_consumption_wh
            # 🚀 BARU: Hitung jumlah unit per tipe
            device_type_counter[device_type_key] = device_type_counter.get(device_type_key, 0) + 1

            if len(df_final) < MINIMUM_ROWS:
                print(f"  - ⚠️ Data tidak cukup ({len(df_final)} baris). Melewati perangkat ini.")
                continue
            
            features_for_model = [col for col in df_final.columns if col != TARGET_VARIABLE]
            X = df_final[features_for_model]; y = df_final[TARGET_VARIABLE]
            X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
            X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

            # --- 2. TAHAP PELATIHAN & EVALUASI ---
            print("\n--- [TAHAP 1: PELATIHAN MODEL] ---")
            model_evals = train_and_evaluate_models(X_train, y_train, X_val, y_val, X_test, y_test)
            best_model_name = min(model_evals, key=lambda k: model_evals[k]['metrics']['mae'])
            best_model_obj = model_evals[best_model_name]['model']
            best_metrics = model_evals[best_model_name]['metrics']
            
            print(f"   ==> 🏆 Model terbaik: {best_model_name} (MAE: {best_metrics['mae']:.2f} Wh | RMSE: {best_metrics['rmse']:.2f} Wh | R²: {best_metrics['r2']:.2f})")

            # --- 3. TAHAP ANALISIS FITUR ---
            print("\n--- [TAHAP 2: ANALISIS PENGARUH FITUR] ---")
            analyze_feature_importance(best_model_obj, features_for_model, device_identifier, RESULTS_DIR)

            # --- 4. TAHAP EVALUASI & REKOMENDASI (PADA TEST SET 2024) ---
            print("\n--- [TAHAP 3: EVALUASI & REKOMENDASI PADA DATA UJI] ---")
            scalers = model_evals.get('LSTM', {})
            df_comparison = predict_on_test_set(best_model_obj, X_test, y_test, scalers)
            
            savings_data = generate_recommendations(df_comparison, device_identifier, DEVICE_OPERATING_HOURS, DEFAULT_OPERATING_HOURS, CORE_BUSINESS_HOURS)

            # Simpan hasil untuk agregasi
            if savings_data:
                group_key = f"{building_name}-{device_type}"
                if building_name not in savings_aggregator:
                    savings_aggregator[building_name] = {}
                if group_key not in savings_aggregator[building_name]:
                    savings_aggregator[building_name][group_key] = {}
                savings_aggregator[building_name][group_key][device_identifier] = savings_data

        except Exception as e:
            print(f"  - ❌ Gagal memproses perangkat {device_identifier}: {e}")

# ==============================================================================
# @title 4. LAPORAN REKOMENDASI PENGHEMATAN AGREGAT
# ==============================================================================
print(f"\n{'='*80}\nLAPORAN REKOMENDASI PENGHEMATAN ENERGI AGREGAT\n{'='*80}")

all_savings_data = []
grand_total_consumption = 0
grand_total_savings = 0

for building, groups in savings_aggregator.items():
    building_total_consumption = 0
    building_total_savings = 0
    building_durations = []
    
    for group_name, devices in groups.items():
        for device_name, data in devices.items():
            building_total_consumption += data['total_consumption']
            building_total_savings += data['potential_savings']
            building_durations.append(data['duration_days'])
            
    grand_total_consumption += building_total_consumption
    grand_total_savings += building_total_savings
    
grand_savings_percent = (grand_total_savings / grand_total_consumption) * 100 if grand_total_consumption > 0 else 0
print(f"SUMMARY KESELURUHAN (SEMUA GEDUNG)")
print(f"--------------------------------------------------")
print(f"Total Perkiraan Konsumsi: {grand_total_consumption:,.2f} kWh")
print(f"Total Potensi Penghematan: {grand_total_savings:,.2f} kWh ({grand_savings_percent:.2f}%)")
print(f"--------------------------------------------------")

for building, groups in savings_aggregator.items():
    building_total_consumption = 0
    building_total_savings = 0
    building_durations = []
    
    for group_name, devices in groups.items():
        for device_name, data in devices.items():
            building_total_consumption += data['total_consumption']
            building_total_savings += data['potential_savings']
            building_durations.append(data['duration_days'])
            
    avg_duration = int(np.mean(building_durations)) if building_durations else 0
    building_savings_percent = (building_total_savings / building_total_consumption) * 100 if building_total_consumption > 0 else 0

    print(f"\n🏢 GEDUNG: {building.upper()} (Analisis berdasarkan data uji selama ~{avg_duration} hari)")
    print(f"   --------------------------------------------------")
    print(f"   Total Perkiraan Konsumsi: {building_total_consumption:,.2f} kWh")
    print(f"   Total Potensi Penghematan: {building_total_savings:,.2f} kWh ({building_savings_percent:.2f}%)")
    print(f"   --------------------------------------------------\n")
    print(f"   RINCIAN DISTRIBUSI PENGHEMATAN:")

    for group_name, devices in groups.items():
        group_total_consumption = sum(d['total_consumption'] for d in devices.values())
        group_total_savings = sum(d['potential_savings'] for d in devices.values())
        group_savings_percent = (group_total_savings / group_total_consumption) * 100 if group_total_consumption > 0 else 0
        
        all_savings_data.append({
            'Gedung': building.upper(),
            'Kelompok Perangkat': group_name.upper(),
            'Lantai/Perangkat': 'AGREGAT KELOMPOK',
            'Potensi Penghematan (kWh)': group_total_savings,
            'Total Konsumsi (kWh)': group_total_consumption
        })
        
        print(f"\n   - Kelompok Perangkat: {group_name.upper()}")
        print(f"     Potensi Penghematan: {group_total_savings:,.2f} kWh ({group_savings_percent:.2f}%) dari total konsumsi kelompok ({group_total_consumption:,.2f} kWh)")
        
        for device_name, data in devices.items():
            floor_savings_percent = (data['potential_savings'] / data['total_consumption']) * 100 if data['total_consumption'] > 0 else 0
            
            all_savings_data.append({
                'Gedung': building.upper(),
                'Kelompok Perangkat': group_name.upper(),
                'Lantai/Perangkat': device_name.upper(),
                'Potensi Penghematan (kWh)': data['potential_savings'],
                'Total Konsumsi (kWh)': data['total_consumption']
            })
            
            print(f"       - Lantai/Perangkat: {device_name.upper()}")
            print(f"         Penghematan: {data['potential_savings']:,.2f} kWh ({floor_savings_percent:.2f}%) dari total konsumsi {data['total_consumption']:,.2f} kWh")

# ==============================================================================
# @title 5. PENYIMPANAN LAPORAN & VISUALISASI PENGHEMATAN
# ==============================================================================
if all_savings_data:
    savings_df = pd.DataFrame(all_savings_data)
    
    csv_path = os.path.join(RESULTS_DIR, 'laporan_potensi_penghematan.csv')
    savings_df.to_csv(csv_path, index=False, float_format='%.2f')
    print(f"\n✅ Laporan potensi penghematan lengkap disimpan di: {csv_path}")

    create_savings_heatmap(savings_df, RESULTS_DIR)
else:
    print("\nTidak ada data penghematan yang dihasilkan untuk disimpan atau divisualisasikan.")

# ==============================================================================
# @title 6. ANALISIS KONSUMSI ENERGI KESELURUHAN
# ==============================================================================
analyze_and_visualize_overall_consumption(device_type_consumption_aggregator, device_type_counter, RESULTS_DIR)

print(f"\n\n🏁🏁🏁 PROSES KESELURUHAN SELESAI 🏁🏁🏁")


✅ Konfigurasi selesai. Skrip akan berjalan menggunakan data dari 'sumber_data'.

📅 Mengambil data hari libur nasional Indonesia untuk tahun: [np.int32(2024)]

Memproses Perangkat: OPMC-SDP-LANTAI3
   - Membuat fitur tambahan...
     - ✓ Fitur 'Current' berhasil dibuat.
     - ✓ Fitur 'Power Factor' dipastikan numerik.

--- [TAHAP 1: PELATIHAN MODEL] ---
   ==> 🏆 Model terbaik: RandomForest (MAE: 1155.62 Wh | RMSE: 2012.35 Wh | R²: 0.85)

--- [TAHAP 2: ANALISIS PENGARUH FITUR] ---
   - Menganalisis Fitur Paling Berpengaruh...
     - ✓ Visualisasi pengaruh fitur disimpan di: hasil_analisis_rekomendasi/feature_importance_opmc-SDP-Lantai3.png

--- [TAHAP 3: EVALUASI & REKOMENDASI PADA DATA UJI] ---

   --- Sistem Rekomendasi Penjadwalan untuk OPMC-SDP-LANTAI3 ---
     - Analisis untuk jam operasional (0:00 - 23:00), Senin-Jumat (berdasarkan data uji selama ~365 hari):
       - Total Perkiraan Konsumsi: 4,304.14 kWh
       - Jam Puncak Konsumsi     : Pukul 19:00 (rata-rata 15.78 kWh)
      

In [1]:
# ==============================================================================
# @title 0. Instalasi Library
# ==============================================================================
# !pip install holidays -q
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.preprocessing import MinMaxScaler
import os
import json
import joblib
import warnings
import holidays

warnings.filterwarnings('ignore')

# ==============================================================================
# @title 1. KONFIGURASI UTAMA
# ==============================================================================
# --- Path Folder ---
SOURCE_DATA_DIR = 'sumber_data' # Folder data training (historis 2024)
RESULTS_DIR = 'hasil_analisis_rekomendasi_revisi_feature_selection' # Folder output untuk semua hasil
EXPORT_DIR = 'exported_models_per_lantai' # Folder untuk menyimpan model final

# --- Konfigurasi Model & Data ---
TARGET_VARIABLE = 'Konsumsi Energi'
RELEVANT_COLUMNS = [
    'Konsumsi Energi', 'Temperature', 'Showers', 'Cloud Cover', 'Weather Code',
    'Relative Humidity', 'Dew Point', 'Precipitation',
    'Pressure MSL', 'Surface Pressure', 'Evapotranspiration',
    'Vapour Pressure Deficit', 'Wind Speed', 'Wind Direction', 'Wind Gusts',
    'Soil Temperature', 'Sunshine Duration', 'UV Index', 'Direct Radiation',
    'Current', 'Power Factor'
]
MINIMUM_ROWS = 500

# --- Konfigurasi Rekomendasi ---
DEVICE_OPERATING_HOURS = {
    'ahu': (8, 16), 'sdp': (0, 23), 'lift': (7, 20), 'chiller': (8, 17)
}
CORE_BUSINESS_HOURS = (9, 17) # Jam 9 pagi sampai 5 sore
DEFAULT_OPERATING_HOURS = (8, 17)

# --- Membuat Folder jika belum ada ---
os.makedirs(RESULTS_DIR, exist_ok=True)
os.makedirs(EXPORT_DIR, exist_ok=True)
os.makedirs(SOURCE_DATA_DIR, exist_ok=True)

print("✅ Konfigurasi selesai. Skrip akan berjalan menggunakan data dari 'sumber_data'.")

# ==============================================================================
# @title 2. Definisi Fungsi-Fungsi Pembantu
# ==============================================================================

def train_and_evaluate_models(X_train, y_train, X_val, y_val, X_test, y_test):
    """Melatih semua model dan mengembalikan hasilnya dengan metrik lengkap."""
    results = {}
    
    # --- Model 1: Random Forest Regressor ---
    rf_model = RandomForestRegressor(n_estimators=100, random_state=42, n_jobs=-1)
    rf_model.fit(X_train, y_train)
    y_pred_rf = rf_model.predict(X_test)
    results['RandomForest'] = {
        'model': rf_model, 
        'metrics': {
            'mae': mean_absolute_error(y_test, y_pred_rf),
            'rmse': np.sqrt(mean_squared_error(y_test, y_pred_rf)),
            'r2': r2_score(y_test, y_pred_rf)
        }
    }
    
    # --- Model 2: Gradient Boosting Regressor ---
    gb_model = GradientBoostingRegressor(n_estimators=100, random_state=42)
    gb_model.fit(X_train, y_train)
    y_pred_gb = gb_model.predict(X_test)
    results['GradientBoosting'] = {
        'model': gb_model, 
        'metrics': {
            'mae': mean_absolute_error(y_test, y_pred_gb),
            'rmse': np.sqrt(mean_squared_error(y_test, y_pred_gb)),
            'r2': r2_score(y_test, y_pred_gb)
        }
    }

    # --- Model 3: LSTM ---
    scaler_X = MinMaxScaler(); scaler_y = MinMaxScaler()
    X_train_s = scaler_X.fit_transform(X_train); y_train_s = scaler_y.fit_transform(y_train.values.reshape(-1, 1))
    X_val_s = scaler_X.transform(X_val); y_val_s = scaler_y.transform(y_val.values.reshape(-1, 1))
    X_test_s = scaler_X.transform(X_test)
    X_train_r = X_train_s.reshape((X_train_s.shape[0], 1, X_train_s.shape[1]))
    X_val_r = X_val_s.reshape((X_val_s.shape[0], 1, X_val_s.shape[1]))
    X_test_r = X_test_s.reshape((X_test_s.shape[0], 1, X_test_s.shape[1]))
    lstm = Sequential([LSTM(50, activation='relu', input_shape=(1, X_train_r.shape[2])), Dense(1)])
    lstm.compile(optimizer='adam', loss='mse')
    lstm.fit(X_train_r, y_train_s, epochs=50, batch_size=32, validation_data=(X_val_r, y_val_s), verbose=0, shuffle=False)
    y_pred = scaler_y.inverse_transform(lstm.predict(X_test_r, verbose=0))
    results['LSTM'] = {
        'model': lstm, 
        'metrics': {
            'mae': mean_absolute_error(y_test, y_pred),
            'rmse': np.sqrt(mean_squared_error(y_test, y_pred)),
            'r2': r2_score(y_test, y_pred)
        },
        'scaler_X': scaler_X, 
        'scaler_y': scaler_y
    }
    return results

def analyze_feature_importance(model, features, device_identifier, output_dir):
    """Menganalisis dan memvisualisasikan fitur yang paling berpengaruh."""
    print("   - Menganalisis Fitur Paling Berpengaruh...")
    if not hasattr(model, 'feature_importances_'):
        print("     - Analisis feature importance tidak didukung untuk model LSTM.")
        return

    importance_df = pd.DataFrame({'Fitur': features, 'Tingkat Pengaruh': model.feature_importances_})
    importance_df = importance_df.sort_values(by='Tingkat Pengaruh', ascending=False).head(10)

    plt.figure(figsize=(12, 8))
    sns.barplot(x='Tingkat Pengaruh', y='Fitur', data=importance_df, palette='viridis')
    plt.title(f'Fitur Paling Berpengaruh Terhadap Konsumsi Energi\n({device_identifier})', fontsize=16)
    plt.xlabel('Tingkat Pengaruh'); plt.ylabel('Fitur')
    plt.tight_layout()
    plot_path = os.path.join(output_dir, f'feature_importance_{device_identifier}.png')
    plt.savefig(plot_path); plt.close()
    print(f"     - ✓ Visualisasi pengaruh fitur disimpan di: {plot_path}")

def predict_on_test_set(model, X_test, y_test, scalers=None):
    """Melakukan prediksi pada test set dan mengembalikan DataFrame perbandingan."""
    if isinstance(model, tf.keras.Model):
        X_test_s = scalers['X'].transform(X_test)
        X_test_r = X_test_s.reshape((X_test_s.shape[0], 1, X_test_s.shape[1]))
        predictions_s = model.predict(X_test_r, verbose=0)
        predictions = scalers['y'].inverse_transform(predictions_s).flatten()
    else:
        predictions = model.predict(X_test)
    
    predictions = np.maximum(0, predictions)
    
    df_results = pd.DataFrame({
        'timestamp': y_test.index,
        'Aktual_Konsumsi_kWh': y_test.values / 1000,
        'Prediksi_Konsumsi_kWh': predictions / 1000
    })
    return df_results.sort_values('timestamp')

def generate_recommendations(df_results, device_identifier, hours_config, default_hours, core_hours):
    """
    Menganalisis hasil prediksi untuk memberikan rekomendasi yang masuk akal.
    """
    device_type = device_identifier.split('-')[1].lower()
    operating_hours = hours_config.get(device_type, default_hours)
    
    print(f"\n   --- Sistem Rekomendasi Penjadwalan untuk {device_identifier.upper()} ---")
    
    df_operating = df_results[(df_results['timestamp'].dt.hour >= operating_hours[0]) & 
                              (df_results['timestamp'].dt.hour <= operating_hours[1]) &
                              (df_results['timestamp'].dt.dayofweek < 5)].copy()

    if df_operating.empty:
        print(f"     - Tidak ada data pada jam operasional ({operating_hours[0]}:00 - {operating_hours[1]}:00) yang ditemukan.")
        return None

    df_business = df_operating[(df_operating['timestamp'].dt.hour >= core_hours[0]) &
                               (df_operating['timestamp'].dt.hour <= core_hours[1])]

    if df_business.empty:
        print(f"     - Tidak ada data pada jam kerja inti ({core_hours[0]}:00 - {core_hours[1]}:00) yang ditemukan.")
        return None

    duration_days = (df_operating['timestamp'].max() - df_operating['timestamp'].min()).days + 1

    hourly_avg_operating = df_operating.groupby(df_operating['timestamp'].dt.hour)['Prediksi_Konsumsi_kWh'].mean()
    hourly_avg_business = df_business.groupby(df_business['timestamp'].dt.hour)['Prediksi_Konsumsi_kWh'].mean()

    peak_hour = hourly_avg_operating.idxmax()
    peak_consumption = hourly_avg_operating.max()
    off_peak_hour = hourly_avg_business.idxmin()
    off_peak_consumption = hourly_avg_business.min()
    total_predicted_consumption = df_operating['Prediksi_Konsumsi_kWh'].sum()
    
    num_peak_hours = len(df_operating[df_operating['timestamp'].dt.hour == peak_hour])
    potential_savings_kwh = (peak_consumption - off_peak_consumption) * num_peak_hours
    potential_savings_percent = (potential_savings_kwh / total_predicted_consumption) * 100 if total_predicted_consumption > 0 else 0

    print(f"     - Analisis untuk jam operasional ({operating_hours[0]}:00 - {operating_hours[1]}:00), Senin-Jumat (berdasarkan data uji selama ~{duration_days} hari):")
    print(f"       - Total Perkiraan Konsumsi: {total_predicted_consumption:,.2f} kWh")
    print(f"       - Jam Puncak Konsumsi     : Pukul {peak_hour}:00 (rata-rata {peak_consumption:.2f} kWh)")
    print(f"       - Jam Lembah (di jam kerja): Pukul {off_peak_hour}:00 (rata-rata {off_peak_consumption:.2f} kWh)")
    print("\n     - REKOMENDASI:")
    print(f"       - Pertimbangkan mengurangi beban pada jam puncak sekitar pukul {peak_hour}:00.")
    print(f"       - Jika memungkinkan, geser sebagian beban kerja ke jam lembah (saat jam kerja) sekitar pukul {off_peak_hour}:00.")
    if potential_savings_kwh > 0 and peak_hour != off_peak_hour:
        print(f"       - POTENSI PENGHEMATAN: Dengan menggeser beban dari jam puncak ke jam lembah,")
        print(f"         Anda berpotensi menghemat ~{potential_savings_kwh:.2f} kWh ({potential_savings_percent:.2f}%) selama periode ~{duration_days} hari.")
        
    return {
        'total_consumption': total_predicted_consumption,
        'potential_savings': potential_savings_kwh,
        'duration_days': duration_days
    }

def create_savings_heatmap(savings_df, output_dir):
    """Membuat dan menyimpan heatmap dari data potensi penghematan."""
    if savings_df.empty:
        print("\nTidak ada data penghematan untuk membuat heatmap.")
        return

    try:
        heatmap_pivot = savings_df.pivot_table(
            index='Gedung', 
            columns='Kelompok Perangkat', 
            values='Potensi Penghematan (kWh)',
            aggfunc='sum'
        )

        plt.figure(figsize=(max(12, len(heatmap_pivot.columns) * 2), max(6, len(heatmap_pivot) * 1)))
        sns.heatmap(heatmap_pivot, annot=True, fmt=".2f", cmap="Greens", linewidths=.5)
        plt.title('Heatmap Potensi Penghematan Energi per Kelompok Perangkat (kWh)', fontsize=16)
        plt.xlabel('Kelompok Perangkat'); plt.ylabel('Gedung')
        plt.xticks(rotation=45, ha='right'); plt.yticks(rotation=0)
        plt.tight_layout()
        heatmap_path = os.path.join(output_dir, 'heatmap_potensi_penghematan.png')
        plt.savefig(heatmap_path); plt.close()
        print(f"\n✅ Heatmap potensi penghematan disimpan di: {heatmap_path}")

    except Exception as e:
        print(f"\n❌ Gagal membuat heatmap: {e}")

def analyze_and_visualize_overall_consumption(consumption_data, unit_counts, output_dir):
    """Menganalisis dan memvisualisasikan konsumsi rata-rata per unit perangkat."""
    if not consumption_data:
        print("Tidak ada data konsumsi untuk dianalisis.")
        return
        
    df = pd.DataFrame(list(consumption_data.items()), columns=['Tipe Perangkat', 'Total Konsumsi (Wh)'])
    df['Jumlah Unit'] = df['Tipe Perangkat'].map(unit_counts)
    df['Total Konsumsi (kWh)'] = df['Total Konsumsi (Wh)'] / 1000
    
    # Hitung rata-rata per unit
    df['Rata-rata per Unit (kWh)'] = df['Total Konsumsi (kWh)'] / df['Jumlah Unit']
    
    # Hitung persentase berdasarkan rata-rata
    df['Persentase (%)'] = (df['Rata-rata per Unit (kWh)'] / df['Rata-rata per Unit (kWh)'].sum()) * 100
    df = df.sort_values(by='Rata-rata per Unit (kWh)', ascending=False)
    
    print(f"\n{'='*80}\nANALISIS KONSUMSI ENERGI RATA-RATA PER UNIT (BERDASARKAN DATA HISTORIS)\n{'='*80}")
    print(df[['Tipe Perangkat', 'Jumlah Unit', 'Rata-rata per Unit (kWh)', 'Persentase (%)']].to_string(index=False))
    
    # Visualisasi Pie Chart
    plt.figure(figsize=(12, 10))
    plt.pie(df['Rata-rata per Unit (kWh)'], labels=df['Tipe Perangkat'], autopct='%1.1f%%',
            startangle=140, pctdistance=0.85, colors=sns.color_palette('viridis', len(df)))
    centre_circle = plt.Circle((0,0),0.70,fc='white')
    fig = plt.gcf()
    fig.gca().add_artist(centre_circle)
    plt.title('Distribusi Konsumsi Energi Rata-Rata per Unit Perangkat', fontsize=16)
    plt.axis('equal')
    pie_chart_path = os.path.join(output_dir, 'pie_chart_distribusi_konsumsi_rata_rata.png')
    plt.savefig(pie_chart_path)
    plt.close()
    print(f"\n✅ Pie chart distribusi konsumsi rata-rata disimpan di: {pie_chart_path}")

# ==============================================================================
# @title 3. PROSES UTAMA END-TO-END
# ==============================================================================

savings_aggregator = {}
device_type_consumption_aggregator = {}
# 🚀 BARU: Dictionary untuk menghitung jumlah unit per tipe perangkat
device_type_counter = {}

# Dapatkan tahun unik dari semua data untuk holidays
all_years = []
for root, dirs, files in os.walk(SOURCE_DATA_DIR):
    for file in files:
        if file.endswith('.csv'):
            try:
                df_time = pd.read_csv(os.path.join(root, file), usecols=['id_time'], parse_dates=['id_time'])
                all_years.extend(df_time['id_time'].dt.year.unique())
            except Exception: continue
unique_years = sorted(list(set(np.unique(all_years))))
id_holidays = holidays.Indonesia(years=unique_years)
print(f"\n📅 Mengambil data hari libur nasional Indonesia untuk tahun: {unique_years}")

# Loop utama untuk setiap file CSV di folder sumber
for root, dirs, files in os.walk(SOURCE_DATA_DIR):
    for file in files:
        if not file.endswith('.csv'):
            continue

        # --- 1. IDENTIFIKASI & PERSIAPAN DATA ---
        file_path = os.path.join(root, file)
        
        try:
            path_parts = os.path.relpath(root, SOURCE_DATA_DIR).split(os.sep)
            building_name = path_parts[0]
            if len(path_parts) > 2:
                floor_name = path_parts[1]
                device_type = path_parts[2]
                device_identifier = f"{building_name}-{device_type}-{floor_name}"
            else:
                device_type = path_parts[1]
                device_identifier = f"{building_name}-{device_type}"
        except IndexError:
            print(f"   - ⚠️ Peringatan: Struktur folder untuk file {file} tidak dikenali. Melewati.")
            continue
        
        print(f"\n{'='*80}\nMemproses Perangkat: {device_identifier.upper()}\n{'='*80}")
        
        try:
            df_full = pd.read_csv(file_path, index_col='id_time', parse_dates=True)
            
            print("   - Membuat fitur tambahan...")
            current_cols = ['id_i1', 'id_i2', 'id_i3']
            if all(col in df_full.columns for col in current_cols):
                for col in current_cols:
                    df_full[col] = pd.to_numeric(df_full[col], errors='coerce')
                df_full.dropna(subset=current_cols, inplace=True)
                df_full['Current'] = df_full[current_cols].sum(axis=1)
                print("     - ✓ Fitur 'Current' berhasil dibuat.")

            if 'Power Factor' in df_full.columns:
                df_full['Power Factor'] = pd.to_numeric(df_full['Power Factor'], errors='coerce')
                print("     - ✓ Fitur 'Power Factor' dipastikan numerik.")

            existing_cols = [col for col in RELEVANT_COLUMNS if col in df_full.columns]
            df_full = df_full[existing_cols].copy()
            
            for col in existing_cols:
                if col != TARGET_VARIABLE:
                        df_full[col] = pd.to_numeric(df_full[col], errors='coerce')

            df_full.dropna(subset=[TARGET_VARIABLE], inplace=True)
            df_full['Konsumsi_Energi_Lag_1'] = df_full[TARGET_VARIABLE].shift(1)
            df_full['is_weekend'] = (df_full.index.dayofweek >= 5).astype(int)
            df_full['isHoliday'] = df_full.index.isin(id_holidays).astype(int)
            
            # --- PENAMBAHAN FITUR TIME BASED ---
            df_full['hour'] = df_full.index.hour
            df_full['day_of_week'] = df_full.index.dayofweek # Senin=0, Minggu=6
            df_full['week_of_year'] = df_full.index.isocalendar().week.astype(int)
            df_full['month_of_year'] = df_full.index.month
            
            df_full.dropna(inplace=True)
            df_final = df_full[df_full[TARGET_VARIABLE] > 0].copy()

            device_type_key = device_type.upper()
            total_consumption_wh = df_final[TARGET_VARIABLE].sum()
            device_type_consumption_aggregator[device_type_key] = device_type_consumption_aggregator.get(device_type_key, 0) + total_consumption_wh
            # 🚀 BARU: Hitung jumlah unit per tipe
            device_type_counter[device_type_key] = device_type_counter.get(device_type_key, 0) + 1

            if len(df_final) < MINIMUM_ROWS:
                print(f"   - ⚠️ Data tidak cukup ({len(df_final)} baris). Melewati perangkat ini.")
                continue
            
            # --- 2. SELEKSI FITUR (HANYA MULTIKOLINEARITAS) ---
            print("\n--- [TAHAP 1.5: SELEKSI FITUR] ---")
            print("   - Menghilangkan fitur dengan multikolinearitas (korelasi absolut >= 0.7)...")
            
            potential_features = [col for col in df_final.columns if col not in [TARGET_VARIABLE, 'Konsumsi_Energi_Lag_1']]
            feature_corr_matrix = df_final[potential_features].corr().abs()
            upper_tri = feature_corr_matrix.where(np.triu(np.ones(feature_corr_matrix.shape), k=1).astype(bool))
            to_drop = [column for column in upper_tri.columns if any(upper_tri[column] >= 0.7)]
            print(f"     - Fitur yang dihapus karena multikolinearitas: {to_drop}")
            
            independent_features = [f for f in potential_features if f not in to_drop]
            print(f"     - Terdapat {len(independent_features)} fitur independen yang tersisa.")
            
            features_for_model = sorted(list(set(['Konsumsi_Energi_Lag_1'] + independent_features)))
            print(f"     - Fitur final yang digunakan untuk model: {len(features_for_model)} fitur")
            
            X = df_final[features_for_model]; y = df_final[TARGET_VARIABLE]
            X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
            X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

            # --- 3. TAHAP PELATIHAN & EVALUASI ---
            print("\n--- [TAHAP 2: PELATIHAN MODEL] ---")
            model_evals = train_and_evaluate_models(X_train, y_train, X_val, y_val, X_test, y_test)
            best_model_name = min(model_evals, key=lambda k: model_evals[k]['metrics']['mae'])
            best_model_obj = model_evals[best_model_name]['model']
            best_metrics = model_evals[best_model_name]['metrics']
            
            print(f"   ==> 🏆 Model terbaik: {best_model_name} (MAE: {best_metrics['mae']:.2f} Wh | RMSE: {best_metrics['rmse']:.2f} Wh | R²: {best_metrics['r2']:.2f})")

            # --- 4. TAHAP ANALISIS FITUR ---
            print("\n--- [TAHAP 3: ANALISIS PENGARUH FITUR] ---")
            analyze_feature_importance(best_model_obj, features_for_model, device_identifier, RESULTS_DIR)

            # --- 5. TAHAP EVALUASI & REKOMENDASI (PADA TEST SET 2024) ---
            print("\n--- [TAHAP 4: EVALUASI & REKOMENDASI PADA DATA UJI] ---")
            scalers = model_evals.get('LSTM', {})
            df_comparison = predict_on_test_set(best_model_obj, X_test, y_test, scalers)
            
            savings_data = generate_recommendations(df_comparison, device_identifier, DEVICE_OPERATING_HOURS, DEFAULT_OPERATING_HOURS, CORE_BUSINESS_HOURS)

            # Simpan hasil untuk agregasi
            if savings_data:
                group_key = f"{building_name}-{device_type}"
                if building_name not in savings_aggregator:
                    savings_aggregator[building_name] = {}
                if group_key not in savings_aggregator[building_name]:
                    savings_aggregator[building_name][group_key] = {}
                savings_aggregator[building_name][group_key][device_identifier] = savings_data

        except Exception as e:
            print(f"   - ❌ Gagal memproses perangkat {device_identifier}: {e}")

# ==============================================================================
# @title 4. LAPORAN REKOMENDASI PENGHEMATAN AGREGAT
# ==============================================================================
print(f"\n{'='*80}\nLAPORAN REKOMENDASI PENGHEMATAN ENERGI AGREGAT\n{'='*80}")

all_savings_data = []
grand_total_consumption = 0
grand_total_savings = 0

for building, groups in savings_aggregator.items():
    building_total_consumption = 0
    building_total_savings = 0
    building_durations = []
    
    for group_name, devices in groups.items():
        for device_name, data in devices.items():
            building_total_consumption += data['total_consumption']
            building_total_savings += data['potential_savings']
            building_durations.append(data['duration_days'])
            
    grand_total_consumption += building_total_consumption
    grand_total_savings += building_total_savings
    
grand_savings_percent = (grand_total_savings / grand_total_consumption) * 100 if grand_total_consumption > 0 else 0
print(f"SUMMARY KESELURUHAN (SEMUA GEDUNG)")
print(f"--------------------------------------------------")
print(f"Total Perkiraan Konsumsi: {grand_total_consumption:,.2f} kWh")
print(f"Total Potensi Penghematan: {grand_total_savings:,.2f} kWh ({grand_savings_percent:.2f}%)")
print(f"--------------------------------------------------")

for building, groups in savings_aggregator.items():
    building_total_consumption = 0
    building_total_savings = 0
    building_durations = []
    
    for group_name, devices in groups.items():
        for device_name, data in devices.items():
            building_total_consumption += data['total_consumption']
            building_total_savings += data['potential_savings']
            building_durations.append(data['duration_days'])
            
    avg_duration = int(np.mean(building_durations)) if building_durations else 0
    building_savings_percent = (building_total_savings / building_total_consumption) * 100 if building_total_consumption > 0 else 0

    print(f"\n🏢 GEDUNG: {building.upper()} (Analisis berdasarkan data uji selama ~{avg_duration} hari)")
    print(f"   --------------------------------------------------")
    print(f"   Total Perkiraan Konsumsi: {building_total_consumption:,.2f} kWh")
    print(f"   Total Potensi Penghematan: {building_total_savings:,.2f} kWh ({building_savings_percent:.2f}%)")
    print(f"   --------------------------------------------------\n")
    print(f"   RINCIAN DISTRIBUSI PENGHEMATAN:")

    for group_name, devices in groups.items():
        group_total_consumption = sum(d['total_consumption'] for d in devices.values())
        group_total_savings = sum(d['potential_savings'] for d in devices.values())
        group_savings_percent = (group_total_savings / group_total_consumption) * 100 if group_total_consumption > 0 else 0
        
        all_savings_data.append({
            'Gedung': building.upper(),
            'Kelompok Perangkat': group_name.upper(),
            'Lantai/Perangkat': 'AGREGAT KELOMPOK',
            'Potensi Penghematan (kWh)': group_total_savings,
            'Total Konsumsi (kWh)': group_total_consumption
        })
        
        print(f"\n   - Kelompok Perangkat: {group_name.upper()}")
        print(f"     Potensi Penghematan: {group_total_savings:,.2f} kWh ({group_savings_percent:.2f}%) dari total konsumsi kelompok ({group_total_consumption:,.2f} kWh)")
        
        for device_name, data in devices.items():
            floor_savings_percent = (data['potential_savings'] / data['total_consumption']) * 100 if data['total_consumption'] > 0 else 0
            
            all_savings_data.append({
                'Gedung': building.upper(),
                'Kelompok Perangkat': group_name.upper(),
                'Lantai/Perangkat': device_name.upper(),
                'Potensi Penghematan (kWh)': data['potential_savings'],
                'Total Konsumsi (kWh)': data['total_consumption']
            })
            
            print(f"       - Lantai/Perangkat: {device_name.upper()}")
            print(f"         Penghematan: {data['potential_savings']:,.2f} kWh ({floor_savings_percent:.2f}%) dari total konsumsi {data['total_consumption']:,.2f} kWh")

# ==============================================================================
# @title 5. PENYIMPANAN LAPORAN & VISUALISASI PENGHEMATAN
# ==============================================================================
if all_savings_data:
    savings_df = pd.DataFrame(all_savings_data)
    
    csv_path = os.path.join(RESULTS_DIR, 'laporan_potensi_penghematan.csv')
    savings_df.to_csv(csv_path, index=False, float_format='%.2f')
    print(f"\n✅ Laporan potensi penghematan lengkap disimpan di: {csv_path}")

    create_savings_heatmap(savings_df, RESULTS_DIR)
else:
    print("\nTidak ada data penghematan yang dihasilkan untuk disimpan atau divisualisasikan.")

# ==============================================================================
# @title 6. ANALISIS KONSUMSI ENERGI KESELURUHAN
# ==============================================================================
analyze_and_visualize_overall_consumption(device_type_consumption_aggregator, device_type_counter, RESULTS_DIR)

print(f"\n\n🏁🏁🏁 PROSES KESELURUHAN SELESAI 🏁🏁🏁")



2025-09-03 14:53:34.059420: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2025-09-03 14:53:34.285491: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-09-03 14:53:40.936919: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.


✅ Konfigurasi selesai. Skrip akan berjalan menggunakan data dari 'sumber_data'.

📅 Mengambil data hari libur nasional Indonesia untuk tahun: [np.int32(2024)]

Memproses Perangkat: OPMC-SDP-LANTAI3
   - Membuat fitur tambahan...
     - ✓ Fitur 'Current' berhasil dibuat.
     - ✓ Fitur 'Power Factor' dipastikan numerik.

--- [TAHAP 1.5: SELEKSI FITUR] ---
   - Menghilangkan fitur dengan multikolinearitas (korelasi absolut >= 0.7)...
     - Fitur yang dihapus karena multikolinearitas: ['Relative Humidity', 'Precipitation', 'Surface Pressure', 'Evapotranspiration', 'Vapour Pressure Deficit', 'Wind Gusts', 'Sunshine Duration', 'UV Index', 'Direct Radiation', 'day_of_week', 'month_of_year']
     - Terdapat 14 fitur independen yang tersisa.
     - Fitur final yang digunakan untuk model: 15 fitur

--- [TAHAP 2: PELATIHAN MODEL] ---


2025-09-03 14:53:44.281169: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


   ==> 🏆 Model terbaik: RandomForest (MAE: 1068.91 Wh | RMSE: 1873.94 Wh | R²: 0.87)

--- [TAHAP 3: ANALISIS PENGARUH FITUR] ---
   - Menganalisis Fitur Paling Berpengaruh...
     - ✓ Visualisasi pengaruh fitur disimpan di: hasil_analisis_rekomendasi_revisi_feature_selection/feature_importance_opmc-SDP-Lantai3.png

--- [TAHAP 4: EVALUASI & REKOMENDASI PADA DATA UJI] ---

   --- Sistem Rekomendasi Penjadwalan untuk OPMC-SDP-LANTAI3 ---
     - Analisis untuk jam operasional (0:00 - 23:00), Senin-Jumat (berdasarkan data uji selama ~365 hari):
       - Total Perkiraan Konsumsi: 4,329.67 kWh
       - Jam Puncak Konsumsi     : Pukul 19:00 (rata-rata 15.92 kWh)
       - Jam Lembah (di jam kerja): Pukul 9:00 (rata-rata 3.80 kWh)

     - REKOMENDASI:
       - Pertimbangkan mengurangi beban pada jam puncak sekitar pukul 19:00.
       - Jika memungkinkan, geser sebagian beban kerja ke jam lembah (saat jam kerja) sekitar pukul 9:00.
       - POTENSI PENGHEMATAN: Dengan menggeser beban dari jam pun