# Exploratory Data Analysis (EDA) of Processed ERA5 Data

This notebook explores the processed ERA5 data for Bonn, Germany (Jan-Jun 2024).

## 1. Load Processed Data

In [ ]:
import xarray as xr
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go

processed_path = 'data/processed/era5_processed_Bonn_2024_months_1-6.nc'
ds = xr.open_dataset(processed_path)
ds

## 2. Summary Statistics and Missing Value Check

In [ ]:
# Convert to DataFrame for easier stats
df = ds.to_dataframe().reset_index()
# Show summary statistics for key variables
key_vars = [
    'surface_solar_radiation_downwards_w_m2',
    '2m_temperature_c',
    '10m_wind_speed',
    'total_precipitation',
    'total_cloud_cover'
]
df[key_vars].describe()

In [ ]:
# Check for missing values
df[key_vars].isnull().sum()

## 3. Interactive Time Series Plots (Plotly)

In [ ]:
for var in key_vars:
    fig = px.line(df, x='time', y=var, title=f'Time Series of {var}')
    fig.show()

## 4. Interactive Histograms and Boxplots (Plotly)

In [ ]:
for var in key_vars:
    fig = px.histogram(df, x=var, nbins=40, title=f'Histogram of {var}')
    fig.show()
    fig = px.box(df, y=var, title=f'Boxplot of {var}')
    fig.show()