In [None]:
import pandas as pd
from scipy import stats
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df = pd.read_csv('data/togo_raw.csv')

In [None]:
print(df.isna().sum())
threshold = len(df) * 0.95
df_clean = df.dropna(thresh=threshold, axis=1)
df_clean['GHI'] = df_clean['GHI'].fillna(df_clean['GHI'].median())

In [None]:
z_scores = stats.zscore(df_clean[['GHI', 'DNI', 'DHI', 'WS', 'WSgust']])
df_clean = df_clean[(np.abs(z_scores) < 3).all(axis=1)]

In [None]:
df_clean['Timestamp'] = pd.to_datetime(df_clean['Timestamp'])
df_clean.set_index('Timestamp')['GHI'].plot(title='GHI Over Time in Togo')
plt.show()

In [None]:
cleaning_effect = df_clean.groupby('Cleaning')[['ModA', 'ModB']].mean()
cleaning_effect.plot(kind='bar', title='Sensor Readings Before/After Cleaning in Togo')
plt.show()

In [None]:
corr_matrix = df_clean[['GHI', 'DNI', 'DHI', 'Tamb', 'RH']].corr()
sns.heatmap(corr_matrix, annot=True)
plt.title('Correlation Matrix for Togo')
plt.show()

In [None]:
df_clean.to_csv('data/togo_clean.csv', index=False)