In [None]:
import pandas as pd

df_sl = pd.read_csv("../data/sierraleone-bumbuna.csv")  # Adjust filename if different
df_sl.info()
df_sl.head()

In [None]:
# Summary stats
df_sl.describe(include="all")

# Missing values (>5%)
missing_sl = df_sl.isna().sum() / len(df_sl) * 100
missing_sl[missing_sl > 5]

In [None]:
from scipy import stats

z_scores_sl = stats.zscore(df_sl[["GHI", "DNI", "DHI", "ModA", "ModB", "WS", "WSgust"]])
outliers_sl = (abs(z_scores_sl) > 3).any(axis=1)
df_sl_clean = df_sl[~outliers_sl].copy()

In [None]:
df_sl_clean["Timestamp"] = pd.to_datetime(df_sl_clean["Timestamp"])
df_sl_clean.set_index("Timestamp", inplace=True)
df_sl_clean["GHI"].resample("D").mean().plot(title="Sierra Leone: Daily GHI")

In [None]:
df_sl_clean.groupby("Cleaning")[["ModA", "ModB"]].mean().plot(kind="bar")

In [None]:
sns.heatmap(df_sl_clean[["GHI", "DNI", "DHI", "TModA", "TModB"]].corr(), annot=True)

In [None]:
from windrose import WindroseAxes

ax = WindroseAxes.from_ax()
ax.bar(df_sl_clean["WD"], df_sl_clean["WS"], bins=6)
ax.set_legend(title="Wind Speed (m/s)")

In [None]:
plt.scatter(df_sl_clean["Tamb"], df_sl_clean["GHI"], s=df_sl_clean["RH"] / 10)
plt.xlabel("Ambient Temp (°C)")
plt.ylabel("GHI (W/m²)")
plt.title("Sierra Leone: GHI vs. Temp (Bubble=RH)")

In [None]:
df_sl_clean.to_csv("../data/sierraleone_clean.csv", index=False)

### Sierra Leone Findings:

- **Peak GHI**: [Value] W/m² at [Time].
- **Wind Patterns**: Dominant direction: [Direction] at [Speed] m/s.
- **Cleaning Impact**: ModA efficiency increased by [X]% post-cleaning.
