# Exploratory Spatio-Temporal Data Analysis
# Part 2 - Descriptive Statistics - Yearly Summary Statistics

In [19]:
import xarray as xr

In [20]:
ds = xr.open_dataset('preprocessed_power_nasa_data.nc')

In [21]:
ds

## Descriptive statistics

### Summmary statistics

#### Yearly Summary Statistics

In [25]:
import xarray as xr
import pandas as pd

def calculate_annual_summary_statistics(ds):
    """
    Calculates the annual summary statistics for each variable in the dataset.

    Parameters:
    - ds (xarray.Dataset): The input dataset with a 'timestamp' dimension.

    Returns:
    - pandas.DataFrame: A DataFrame containing the mean, standard deviation, minimum, and maximum for each variable annually.

    Example of usage:
    annual_stats_df = calculate_annual_summary_statistics(ds)
    print(annual_stats_df)
    """
    # Ensure the timestamp is of datetime type for proper grouping
    if not isinstance(ds['timestamp'].dtype, pd.core.dtypes.dtypes.DatetimeTZDtype):
        ds['timestamp'] = pd.to_datetime(ds['timestamp'])

    # Group by year
    grouped = ds.groupby('timestamp.year')

    # Calculate statistics for each variable
    annual_stats = {}
    for var in ds.data_vars:
        var_data = grouped[var]
        annual_stats[var] = var_data.mean(dim='timestamp'), var_data.std(dim='timestamp'), var_data.min(dim='timestamp'), var_data.max(dim='timestamp')

    # Combine statistics into a DataFrame
    annual_stats_df = pd.concat([pd.concat([annual_stats[var][stat] for stat in ['mean', 'std', 'min', 'max']], axis=1, keys=['mean', 'std', 'min', 'max'], names=['stat']) for var in ds.data_vars], axis=1, keys=ds.data_vars)

    return annual_stats_df

# Usage example
annual_stats_df = calculate_annual_summary_statistics(ds)
print(annual_stats_df)


KeyError: 'PRECTOTCORR'

In [23]:
# Call the function and store the summary statistics in a DataFrame
annual_stats_df = calculate_annual_summary_statistics(ds)

annual_stats_df

AttributeError: 'Dataset' object has no attribute 'aggregate'

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

def plot_annual_stats(df):
    """
    Plots the mean, standard deviation, and shows the minimum and maximum of variables over years in a colorful and aesthetically pleasing manner.

    Parameters:
    - df (pd.DataFrame): DataFrame with columns ['Variable', 'Mean', 'Standard Deviation', 'Minimum', 'Maximum', 'Year']

    This function does not return anything but displays a line plot for each variable
    with the mean values per year and error bars representing the standard deviation. Additionally, it marks the minimum and maximum values.

    Example of usage:
    plot_annual_stats(annual_stats_df)
    """
    # Set aesthetic style
    sns.set(style="whitegrid", context='talk')

    # Variables to plot
    variables = df['Variable'].unique()

    # Creating a figure to plot
    fig, axes = plt.subplots(len(variables), 1, figsize=(12, 6 * len(variables)), sharex=True)

    if len(variables) == 1:
        axes = [axes]  # Make sure axes is iterable

    # Color palette
    colors = sns.color_palette("husl", n_colors=len(variables))

    # Loop over each variable to create a subplot
    for var, ax, color in zip(variables, axes, colors):
        # Filter the DataFrame for the variable
        var_data = df[df['Variable'] == var]

        # Plotting the mean with standard deviation as error bars
        ax.errorbar(var_data['Year'], var_data['Mean'], yerr=var_data['Standard Deviation'], fmt='-o', capsize=5, color=color, label=f'Mean (± SD) of {var}')
        # Scatter plot for min and max
        ax.scatter(var_data['Year'], var_data['Minimum'], color=color, marker='_', s=100, label='Minimum')
        ax.scatter(var_data['Year'], var_data['Maximum'], color=color, marker='_', s=100, label='Maximum')
        
        ax.set_title(f'{var}')
        ax.set_ylabel('Value')
        ax.legend(frameon=True, loc='upper left')

    # Set x-label and adjust layout
    plt.xlabel('Year')
    plt.tight_layout()
    plt.show()

# Assuming annual_stats_df is your DataFrame as described, you can call the function as follows:
plot_annual_stats(annual_stats_df)


### What is Root Zone Soil Wetness?
Root zone soil wetness, measured on a daily scale, is an indicator of the moisture content within the soil layer that extends from the surface to 100 centimeters (cm) below grade. This particular layer is crucial because it encompasses the majority of the root systems for many plants, making it vital for agricultural and hydrological monitoring.

In [None]:
# Convert daily data to monthly by averaging
yearly_data = ds.resample(timestamp='YE').mean()

# Computing monthly averages specifically for the GWETROOT variable
yearly_soil_wetness = ds['GWETROOT'].resample(timestamp='YE').mean()

In [None]:
yearly_soil_wetness

In [None]:
import xarray as xr
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import cartopy.feature as cfeature
from matplotlib.colors import LinearSegmentedColormap

def plot_yearly_data(yearly_data):
    """
    Plots yearly climate data on a geographical map using a colormap that highlights soil wetness variations.

    Parameters:
    - yearly_data (xarray.Dataset): The dataset containing the climate data, specifically resampled to yearly averages.

    This function visualizes soil wetness for each year on a geographical map.
    """
    # Define a custom colormap from red (dry) to blue (wet)
    colors = ["red", "orange", "yellow", "green", "blue"]
    cmap = LinearSegmentedColormap.from_list("custom", colors, N=256)

    num_years = len(yearly_data.timestamp)
    ncols = 3  # A smaller number of columns for yearly data, assuming fewer years than months
    nrows = (num_years + ncols - 1) // ncols
    fig = plt.figure(figsize=(18, 3 * nrows), dpi=200)  # Adjust size and DPI for clarity

    for i, time in enumerate(yearly_data.timestamp):
        ax = fig.add_subplot(nrows, ncols, i + 1, projection=ccrs.PlateCarree())
        ax.coastlines(resolution='10m', color='black', linewidth=1)
        ax.add_feature(cfeature.BORDERS, linestyle=':')
        ax.set_extent([yearly_data.lon.min(), yearly_data.lon.max(), yearly_data.lat.min(), yearly_data.lat.max()], crs=ccrs.PlateCarree())
        
        # Plot the data
        soil_wetness = yearly_data.sel(timestamp=time)
        p = soil_wetness.plot(ax=ax, transform=ccrs.PlateCarree(), add_colorbar=False,
                              vmin=0, vmax=1, cmap=cmap)  # Applying the custom colormap
        
        ax.set_title(time.dt.strftime('%Y').values)  # Year as the title

    plt.tight_layout()
    
    # Add a colorbar with settings
    fig.subplots_adjust(right=0.9)
    cbar_ax = fig.add_axes([0.92, 0.15, 0.02, 0.7])
    fig.colorbar(p, cax=cbar_ax, label='Soil Wetness', extend='both')

    plt.show()
