# Exercise 2
**Evaluate the Scale of Measurement on Soil Moisture**


## Overview



## Imports

In [None]:
import hvplot.pandas  # noqa
import pandas as pd

## Loading Soil Moisture and Anomaly Data

As before, we load the data as a `pandas.DataFrame`.

In [None]:
%run ../src/download_path.py

url = make_url("era5_ssm_timeseries.csv")  # noqa
df_era5 = pd.read_csv(
    url,
    index_col="time",
    parse_dates=True,
)

df_era5.head()

In [None]:
url = make_url("ascat-6_25_ssm_timeseries.csv")  # noqa
df_ascat = pd.read_csv(
    url,
    index_col="time",
    parse_dates=True,
)

df_ascat.head()

In [None]:
density_df = pd.DataFrame(
    {
        "name": ["Buzi", "Chokwé", "Mabalane", "Mabote", "Muanza"],
        "bulk_density": [1.25, 1.4, 1.4, 1.35, 1.25],
    }
).set_index("name")
density_df

In [None]:
def calc_porosity(x):
    return 1 - x / 2.65


porosity_df = density_df.transform(calc_porosity).rename(
    columns={"bulk_density": "porosity"}
)
porosity_df

## Pandas Indexing and Selecting

Pandas is great for slicing and dicing you data. In the following, we will be using the data of the following locations, as also introduced in the notebooks. Here we show how to extract the location ids given the longitude and latitude of the location.

In [None]:
density_df = pd.DataFrame(
    {
        "name": ["Buzi", "Chokwé", "Mabalane", "Mabote", "Muanza"],
        "bulk_density": [1.25, 1.4, 1.4, 1.35, 1.25],
    }
).set_index("name")
density_df

In [None]:
df_ascat_porosity = df_ascat.merge(porosity_df, left_on="name", right_index=True)
df_ascat_porosity.head()

In [None]:
def deg2vol(df):
    return df["porosity"] * df["surface_soil_moisture"] / 100


df_ascat_vol = df_ascat.copy()
df_ascat_vol["unit"] = "m³/m³"
df_ascat_vol["surface_soil_moisture"] = df_ascat_porosity.apply(deg2vol, axis=1)
df_ascat_vol.head()

## Correlating

In [None]:
df = pd.concat([df_era5, df_ascat_vol])
df.head()

In [None]:
df.hvplot.scatter(
    x="time",
    y="surface_soil_moisture",
    by="type",
    groupby="name",
    frame_width=800,
    padding=(0.01, 0.1),
    alpha=0.5,
)

In [None]:
df_insitu_daily = (
    df_era5.groupby("name")["surface_soil_moisture"]
    .resample("D")
    .median()
    .to_frame("era5")
)

df_ascat_vol_daily = (
    df_ascat_vol.groupby("name")["surface_soil_moisture"]
    .resample("D")
    .median()
    .to_frame("ascat")
)

df_combined = pd.merge(
    df_ascat_vol_daily, df_insitu_daily, left_index=True, right_index=True
)
df_combined.head()

In [None]:
df_combined.groupby("name").corr(method="pearson")