In [None]:
# ------------------------------
# EDA Template Notebook
# ------------------------------

# 1️⃣ Setup imports and repo path
import sys
from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt

# Add repo root to sys.path so 'src' can be imported
repo_root = Path(r"D:\Python\Week_01\Assignment\solar-challenge-week0")
if str(repo_root) not in sys.path:
    sys.path.append(str(repo_root))

# Now imports from src will work
from src.data_loader import load_country_data
from src.preprocess import preprocess_dataset
from src.analyze import run_full_analysis

# ------------------------------
# 2️⃣ Set dataset and country
# ------------------------------
# Change these for each country
FILENAME = "benin-malanville.csv"  # raw CSV file name
COUNTRY_NAME = "benin"
TIMESTAMP_COL = "timestamp"  # adjust if your CSV column differs

# ------------------------------
# 3️⃣ Load raw dataset
# ------------------------------
df_raw = load_country_data(FILENAME)
print(f"Raw dataset shape: {df_raw.shape}")
display(df_raw.head())

# ------------------------------
# 4️⃣ Preprocess dataset
# ------------------------------
df_clean = preprocess_dataset(df_raw, country=COUNTRY_NAME)
print(f"Cleaned dataset shape: {df_clean.shape}")
display(df_clean.head())

# ------------------------------
# 5️⃣ Run full EDA/Analysis
# ------------------------------
run_full_analysis(df_clean, country=COUNTRY_NAME, timestamp_col=TIMESTAMP_COL)

# ------------------------------
# 6️⃣ Optional: Bubble Chart
# ------------------------------
# GHI vs Tamb with RH as bubble size
if all(col in df_clean.columns for col in ["ghi", "tamb", "rh"]):
    plt.figure(figsize=(10, 6))
    plt.scatter(df_clean["ghi"], df_clean["tamb"], s=df_clean["rh"], alpha=0.5,
                c="orange", edgecolors="k")
    plt.xlabel("GHI")
    plt.ylabel("Tamb")
    plt.title(f"GHI vs Tamb with RH bubble size ({COUNTRY_NAME})")
    plt.show()


ModuleNotFoundError: No module named 'src'