In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import zscore
import os

# Load data
df = pd.read_csv('data/togo.csv')

# Summary Statistics & Missing Values
summary_stats = df.describe()
missing_values = df.isna().sum()
missing_report = missing_values[missing_values > len(df) * 0.05]
print("Summary Statistics:\n", summary_stats)
print("\nMissing Values (>5%):\n", missing_report)

# Outlier Detection
z_scores = df[['GHI', 'DNI', 'DHI', 'ModA', 'ModB', 'WS', 'WSgust']].apply(zscore)
outliers = (z_scores.abs() > 3).any(axis=1)
print(f"Number of outlier rows: {outliers.sum()}")

# Cleaning
df_clean = df.copy()
for col in ['GHI', 'DNI', 'DHI', 'ModA', 'ModB']:
    df_clean[col].fillna(df_clean[col].median(), inplace=True)
df_clean = df_clean[~outliers]
df_clean.to_csv('data/togo_clean.csv', index=False)

# Time Series Analysis
df_clean['Timestamp'] = pd.to_datetime(df_clean['Timestamp'])
df_clean.set_index('Timestamp').groupby(pd.Grouper(freq='M'))[['GHI', 'DNI', 'DHI', 'Tamb']].mean().plot()
plt.title('Monthly Average GHI, DNI, DHI, Tamb - Togo')
plt.savefig('plots/togo_timeseries.png')
plt.close()

# Cleaning Impact
cleaning_impact = df_clean.groupby('Cleaning')[['ModA', 'ModB']].mean()
cleaning_impact.plot(kind='bar')
plt.title('Average ModA & ModB Pre/Post Cleaning - Togo')
plt.savefig('plots/togo_cleaning_impact.png')
plt.close()

# Correlation Analysis
corr = df_clean[['GHI', 'DNI', 'DHI', 'TModA', 'TModB']].corr()
sns.heatmap(corr, annot=True, cmap='coolwarm')
plt.title('Correlation Heatmap - Togo')
plt.savefig('plots/togo_correlation.png')
plt.close()

# Scatter Plots
plt.scatter(df_clean['WS'], df_clean['GHI'], alpha=0.5)
plt.xlabel('Wind Speed (m/s)')
plt.ylabel('GHI (W/m²)')
plt.title('WS vs GHI - Togo')
plt.savefig('plots/togo_ws_ghi.png')
plt.close()

# Wind Rose (simplified)
sns.histplot(df_clean['WD'], bins=36)
plt.title('Wind Direction Distribution - Togo')
plt.savefig('plots/togo_wind_rose.png')
plt.close()

# Bubble Chart
plt.scatter(df_clean['Tamb'], df_clean['GHI'], s=df_clean['RH']*10, alpha=0.5)
plt.xlabel('Ambient Temperature (°C)')
plt.ylabel('GHI (W/m²)')
plt.title('GHI vs Tamb (Bubble Size = RH) - Togo')
plt.savefig('plots/togo_bubble.png')
plt.close()