In [None]:
# -------------------------------------------
# Benin EDA Notebook (eda-benin branch)
# -------------------------------------------

# 1️⃣ Setup: imports
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path

# 2️⃣ Import modules from src
# Since src now exists on this branch, we can import normally
import load_country_data
import preprocess_dataset
import run_full_analysis

# 3️⃣ Dataset configuration
FILENAME = "benin-malanville.csv"  # Adjust for Togo or Sierra Leone
COUNTRY_NAME = "benin"
TIMESTAMP_COL = "timestamp"  # Adjust if your CSV uses a different column name

# 4️⃣ Load raw dataset
df_raw = load_country_data(FILENAME)
print(f"Raw dataset shape: {df_raw.shape}")
display(df_raw.head())

# 5️⃣ Preprocess dataset
df_clean = preprocess_dataset(df_raw, country=COUNTRY_NAME)
print(f"Cleaned dataset shape: {df_clean.shape}")
display(df_clean.head())

# 6️⃣ Run full EDA / analysis
# This includes summary stats, missing values, outliers, time series plots,
# cleaning impact, correlations, wind & distribution analysis, temperature analysis, etc.
run_full_analysis(df_clean, country=COUNTRY_NAME, timestamp_col=TIMESTAMP_COL)

# 7️⃣ Optional: Bubble chart (GHI vs Tamb with RH as bubble size)
if all(col in df_clean.columns for col in ["ghi", "tamb", "rh"]):
    plt.figure(figsize=(10, 6))
    plt.scatter(df_clean["ghi"], df_clean["tamb"], s=df_clean["rh"], alpha=0.5,
                c="orange", edgecolors="k")
    plt.xlabel("GHI")
    plt.ylabel("Tamb")
    plt.title(f"GHI vs Tamb with RH bubble size ({COUNTRY_NAME})")
    plt.show()


ModuleNotFoundError: No module named 'data_loader'