In [None]:
# ------------------------------
# EDA Template Notebook
# ------------------------------

# 1️⃣ Setup imports
import pandas as pd
from src.data_loader import load_country_data
from src.preprocess import preprocess_dataset
from src.analyze import run_full_analysis

# ------------------------------
# 2️⃣ Set dataset and country
# ------------------------------
# Change these for each country
FILENAME = "data/benin-malanville.csv"  # raw CSV file path
COUNTRY_NAME = "benin"
TIMESTAMP_COL = "timestamp"  # adjust if your CSV column differs

# ------------------------------
# 3️⃣ Load raw dataset
# ------------------------------
df_raw = load_country_data(FILENAME)
print(f"Raw dataset shape: {df_raw.shape}")
print(df_raw.head())

# ------------------------------
# 4️⃣ Preprocess dataset
# ------------------------------
df_clean = preprocess_dataset(df_raw, country=COUNTRY_NAME)
print(f"Cleaned dataset shape: {df_clean.shape}")
print(df_clean.head())

# ------------------------------
# 5️⃣ Run full EDA/Analysis
# ------------------------------
run_full_analysis(df_clean, country=COUNTRY_NAME, timestamp_col=TIMESTAMP_COL)

# ------------------------------
# 6️⃣ Additional analysis (optional)
# ------------------------------
# Example: Plot bubble chart GHI vs Tamb with RH as bubble size
import matplotlib.pyplot as plt

if "ghi" in df_clean.columns and "tamb" in df_clean.columns and "rh" in df_clean.columns:
    plt.figure(figsize=(10, 6))
    plt.scatter(df_clean["ghi"], df_clean["tamb"], s=df_clean["rh"], alpha=0.5, c="orange", edgecolors="k")
    plt.xlabel("GHI")
    plt.ylabel("Tamb")
    plt.title(f"GHI vs Tamb with RH bubble size ({COUNTRY_NAME})")
    plt.show()
