# Import packages:


In [None]:
import geopandas as gpd
import numpy as np
import pandas as pd

### Load time series data from different sets


In [None]:
train_df = pd.read_csv("../data/train_timeseries/train_timeseries.csv")
test_df = pd.read_csv("../data/test_timeseries/test_timeseries.csv")
val_df = pd.read_csv("../data/validation_timeseries/validation_timeseries.csv")

### Concatenate datasets


In [None]:
data = pd.concat([train_df, test_df, val_df])

### Process date column


In [None]:
data["date"] = pd.to_datetime(data["date"])

### Standardize FIPS code


In [None]:
data["fips"] = data["fips"].astype("string[pyarrow]").str.zfill(5)

### Rename score column to drought


In [None]:
data = data.rename(columns={"score": "drought"})

### Load US states shapefile data for geospatial mapping


In [None]:
us_states = gpd.read_file("../data/maps/cb_2022_us_state_500k.zip")

### Add state names to the data by merging with state shapefile data


In [None]:
data = (
    data.assign(state_fip=data["fips"].str[:2])
    .drop(columns=["fips"])
    .merge(
        right=us_states[["STATEFP", "NAME"]],
        left_on="state_fip",
        right_on="STATEFP",
        how="inner",
    )
    .drop(columns=["state_fip", "STATEFP"])
    .rename(columns={"NAME": "state_name"})
)

### Spatio-temporal aggregation to create average monthly metrics by state


In [None]:
aggregated_data = data.groupby(by="state_name").resample(rule="M", on="date").mean()

### Save aggregated data to Parquet


In [None]:
aggregated_data.to_parquet("../data/aggregated_data.parquet")