In [None]:
# ==============================================================================
# === INTERPOLASI 10 15 20 30 1 2 ===
# ==============================================================================

import pandas as pd
import numpy as np
from tqdm.auto import tqdm

# ==============================================================================
# === PENGATURAN GLOBAL: NAMA FILE DAN PARAMETER ===
# ==============================================================================
# Cukup ubah nama file dan kolom di bagian ini sesuai kebutuhan Anda.

# File input mentah
input_file_raw = 'data_smartmeter_split\WITEL\WITEL_SDP_L5.csv'

# --- Tentukan semua kolom yang ingin Anda proses ---
columns_to_process = [
    'id_stand_energy_kirim',
    'id_v1', 'id_v2', 'id_v3', 'id_i1', 'id_i2', 'id_i3', 'id_frequency',
    'id_power_factor',
    'reactive_energy_import', 'reactive_energy_export', 'apparent_energy_import', 'rssi',       # Contoh: Tambahkan kolom lain di sini
]

# --- ATUR SEMUA INTERVAL YANG DIINGINKAN DI SINI ---
\# Anda bisa menambah, mengubah, atau menghapus interval sesuai kebutuhan.
# 'freq': Kode frekuensi pandas ('T' untuk menit, 'H' untuk jam).
# 'gap_limit': Batas maksimal jeda data yang bisa diinterpolasi.
# 'unit_text': Teks untuk nama file dan ringkasan.
processing_jobs = [
    {'freq': '10T', 'gap_limit': 6,  'unit_text': '10_Menit'},
    {'freq': '15T', 'gap_limit': 4,  'unit_text': '15_Menit'},
    {'freq': '20T', 'gap_limit': 3,  'unit_text': '20_Menit'},
    {'freq': '30T', 'gap_limit': 2,  'unit_text': '30_Menit'},
    {'freq': '1H',  'gap_limit': 3,  'unit_text': '1_Jam'},
    {'freq': '2H',  'gap_limit': 2,  'unit_text': '2_Jam'},
]

# ==============================================================================
# === FUNGSI-FUNGSI UTAMA ===
# ==============================================================================

def print_status_summary(df, status_col_name, interval_text):
    """Mencetak ringkasan statistik untuk kolom status tertentu."""
    print("-" * 40)
    print(f"Ringkasan untuk '{status_col_name}' ({interval_text})")
    print("-" * 40)
    total_data = len(df)
    print(f"Total Data\t\t: {total_data}")
    status_counts = df[status_col_name].value_counts()
    original_count = status_counts.get('original', 0)
    interpolated_count = status_counts.get('interpolated', 0)
    gap_count = status_counts.get('gap_too_large', 0)
    zero_value_count = status_counts.get('original_zero_value', 0)
    print(f"Data Original\t\t: {original_count}")
    print(f"Data Interpolasi\t: {interpolated_count}")
    print(f"Gap Terlalu Besar\t: {gap_count}")
    if zero_value_count > 0:
        print(f"Nilai 0 Asli\t\t: {zero_value_count}")
    print("-" * 40)

def process_interval_data(df_meter, freq, gap_limit):
    """
    Fungsi generik untuk memproses data untuk interval apa pun.
    """
    if df_meter.empty:
        return None

    # Resample ke frekuensi yang ditentukan
    df_resampled = df_meter.resample(freq).first()

    for col in columns_to_process:
        status_col_name = f'{col}_status'
        df_resampled[status_col_name] = np.where(df_resampled[col].notna(), 'original', 'created_for_sequence')

        is_na = df_resampled[col].isna()
        na_blocks = (is_na != is_na.shift()).cumsum()
        block_sizes = df_resampled.groupby(na_blocks)[col].transform('size')
        large_gap_mask = (block_sizes > gap_limit) & is_na

        df_resampled[col] = df_resampled[col].interpolate(method='time', limit_direction='both')
        df_resampled.loc[large_gap_mask, col] = 0

        df_resampled.loc[large_gap_mask, status_col_name] = 'gap_too_large'
        interpolated_mask = (df_resampled[status_col_name] == 'created_for_sequence') & (df_resampled[col].notna())
        df_resampled.loc[interpolated_mask, status_col_name] = 'interpolated'

    df_resampled['id_meter_id'] = df_resampled['id_meter_id'].ffill().bfill()
    df_resampled['id_id'] = df_resampled['id_id'].ffill().bfill()

    return df_resampled

# ==============================================================================
# === PROSES UTAMA ===
# ==============================================================================

try:
    # --- Langkah Awal: Membaca dan Membersihkan Data Mentah ---
    print("="*50)
    print("LANGKAH AWAL: MEMBACA DAN MEMBERSIHKAN DATA MENTAH")
    print("="*50)

    cols_to_read = ['id_id', 'id_n_id', 'id_time', 'id_meter_id', 'id_stand_energy_kirim',
    'id_v1', 'id_v2', 'id_v3', 'id_i1', 'id_i2', 'id_i3', 'id_frequency',
    'id_power_factor', 'id_create_date', 'updated_at', 'created_at',
    'reactive_energy_import', 'reactive_energy_export', 'apparent_energy_import'] + columns_to_process
    print(f"Membaca file mentah: '{input_file_raw}'...")
    base_df = pd.read_csv(input_file_raw, usecols=cols_to_read)

    base_df['id_time'] = pd.to_datetime(base_df['id_time'])
    base_df = base_df.drop_duplicates(subset=['id_meter_id', 'id_time'], keep='first')
    for col in columns_to_process:
        base_df[col] = base_df[col].replace(0, np.nan)

    base_df.dropna(subset=columns_to_process, how='all', inplace=True)
    base_df.set_index('id_time', inplace=True)
    print("Data mentah berhasil dibaca dan dibersihkan.")

    # --- Loop Pemrosesan untuk Setiap Interval ---
    for job in processing_jobs:
        freq = job['freq']
        gap_limit = job['gap_limit']
        unit_text = job['unit_text']

        print("\n" + "="*50)
        print(f"MEMULAI PROSES UNTUK INTERVAL {unit_text.replace('_', ' ')}")
        print("="*50)

        grouped = base_df.groupby('id_meter_id')
        all_processed_dfs = []

        for meter_id, group in tqdm(grouped, desc=f"Proses {unit_text}"):
            processed_df = process_interval_data(group.copy(), freq, gap_limit)
            if processed_df is not None:
                all_processed_dfs.append(processed_df)

        final_df = pd.concat(all_processed_dfs)
        final_df.reset_index(inplace=True)

        # Mengatur urutan kolom
        final_cols_order = ['id_id', 'id_n_id', 'id_time', 'id_meter_id', 'id_stand_energy_kirim',
    'id_v1', 'id_v2', 'id_v3', 'id_i1', 'id_i2', 'id_i3', 'id_frequency',
    'id_power_factor', 'id_create_date', 'updated_at', 'created_at',
    'reactive_energy_import', 'reactive_energy_export', 'apparent_energy_import']
        for col in columns_to_process:
            final_cols_order.append(col)
            final_cols_order.append(f'{col}_status')
        final_df = final_df[final_cols_order]

        # Menyimpan file output
        base_name = input_file_raw.replace('.csv', '')
        output_file = f'{base_name}_hasil_interpolasi_{unit_text}.csv'
        final_df.to_csv(output_file, index=False)

        print(f"\nProses Interval {unit_text.replace('_', ' ')} Selesai! Hasil disimpan di: {output_file}")

        # Mencetak ringkasan
        for col in columns_to_process:
            print_status_summary(final_df, f'{col}_status', unit_text.replace('_', ' '))

except FileNotFoundError:
    print(f"Error: File '{input_file_raw}' tidak ditemukan.")
except Exception as e:
    print(f"Terjadi error: {e}")