In [None]:
# 📦 Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import zscore


In [None]:
# 📄 Load Data
df = pd.read_csv("./../data//togo.csv", parse_dates=["Timestamp"])
df.head()

In [None]:
# 📊 Summary Stats + Nulls
print(df.describe())
print("\nMissing Values:\n", df.isna().sum())


In [None]:
# 🔍 Outlier Detection
cols_to_check = ['GHI', 'DNI', 'DHI', 'ModA', 'ModB', 'WS', 'WSgust']
z_scores = df[cols_to_check].apply(zscore)
outliers = (np.abs(z_scores) > 3).sum()
print("\nOutliers:\n", outliers)

In [None]:
# 🧹 Clean Data
df_clean = df.copy()
df_clean[cols_to_check] = df_clean[cols_to_check].fillna(df_clean[cols_to_check].median())
df_clean = df_clean[(np.abs(z_scores) <= 3).all(axis=1)]


In [None]:
# 📈 Time Series Visuals
df_clean.set_index("Timestamp")[["GHI", "DNI", "DHI", "Tamb"]].plot(figsize=(15,6))
plt.title("Togo: Irradiance and Temperature Over Time")
plt.show()

In [None]:

# 🧽 Cleaning Effect
df_clean.groupby("Cleaning")[["ModA", "ModB"]].mean().plot(kind="bar")
plt.title("Sensor Output Before vs After Cleaning (Togo)")
plt.show()

In [None]:
# ⚪ Bubble Plot
plt.scatter(df_clean["Tamb"], df_clean["GHI"], s=df_clean["RH"], alpha=0.5)
plt.xlabel("Temperature")
plt.ylabel("GHI")
plt.title("GHI vs Tamb (Bubble Size = RH) – Togo")
plt.show()

In [None]:
# 📁 Save Clean Data
df_clean.to_csv("../data/togo_clean.csv", index=False)# 📁 Save Clean Data
df_clean.to_csv("../data/togo_clean.csv", index=False)