 Import Libraries and Load Data

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from scipy import stats
import plotly.express as px

# Load dataset
df = pd.read_csv("data/sierra_leone.csv", parse_dates=['Timestamp'])


Outlier Detection (Z-Score Method)

In [None]:
from scipy.stats import zscore

cols_to_check = ['GHI', 'DNI', 'DHI', 'ModA', 'ModB', 'WS', 'WSgust']
z_scores = df[cols_to_check].apply(zscore)
outliers = (np.abs(z_scores) > 3)

# Outlier percentage
outliers.sum() / len(df)

# Handle missing and outliers
df[cols_to_check] = df[cols_to_check].fillna(df[cols_to_check].median())
df = df[~(np.abs(z_scores) > 3).any(axis=1)]


Time Series Analysis

In [None]:
df.set_index('Timestamp')[['GHI', 'DNI', 'DHI', 'Tamb']].plot(figsize=(15,5))
plt.title('Solar Irradiance & Temperature Over Time')

df['Month'] = df['Timestamp'].dt.month
df.groupby('Month')[['GHI', 'DNI', 'DHI']].mean().plot(kind='bar', figsize=(10,5))


Cleaning Impact

In [None]:
df.groupby('Cleaning')[['ModA', 'ModB']].mean().plot(kind='bar', title='Sensor Output Before/After Cleaning')


Correlation Analysis

In [None]:
plt.figure(figsize=(10,6))
sns.heatmap(df[['GHI', 'DNI', 'DHI', 'TModA', 'TModB']].corr(), annot=True, cmap='coolwarm')

sns.scatterplot(data=df, x='WS', y='GHI')
sns.scatterplot(data=df, x='RH', y='Tamb')


Wind & Distribution

In [None]:
# Optional: Wind Rose
# pip install windrose

from windrose import WindroseAxes
ax = WindroseAxes.from_ax()
ax.bar(df['WD'], df['WS'], normed=True, opening=0.8, edgecolor='white')
ax.set_title("Wind Rose")

# Histograms
df[['GHI', 'WS']].hist(bins=30, figsize=(10,5))


Temperature, Humidity & Bubble Plot

In [None]:
plt.figure(figsize=(10,6))
plt.scatter(df['GHI'], df['Tamb'], s=df['RH'], alpha=0.5)
plt.xlabel("GHI")
plt.ylabel("Ambient Temp")
plt.title("GHI vs. Temperature (Bubble size = RH)")


Export Clean Data

In [None]:
df.to_csv("data/sierra_leone_clean.csv", index=False)
