# Weather data in GEE

## Initializing dependencies

In [3]:
# Import necessary libraries
import ee  # Google Earth Engine API
import geemap  # For interactive maps with GEE and other utilities
import pandas as pd  # For data manipulation
import numpy as np  # For numerical operations
import matplotlib.pyplot as plt  # For plotting
import geopandas as gpd  # For geospatial data handling

In [5]:
ee.Authenticate()


Successfully saved authorization token.


In [4]:
ee.Initialize()

## TerraClimate

In [5]:
# Load TerraClimate dataset
terraclimate = ee.ImageCollection("IDAHO_EPSCOR/TERRACLIMATE") \
    .filterDate("2000-01-01", "2023-12-31") \
    .select(["pdsi", "pr", "tmmn", "tmmx"])

In [31]:
# Load AMC shapefile
amc_fc = ee.FeatureCollection("projects/degraded-pastures/assets/amc_1991_2022")

Define functions to calculate the mean, min and max and the sum (explain better!)

In [35]:
# Reducer to calculate mean and min
mean_min_reducer = ee.Reducer.mean().combine(
    reducer2=ee.Reducer.min(),
    sharedInputs=True  # Ensures both reducers use the same input data
    )

# reducer to calculate mean and max
mean_max_reducer = ee.Reducer.mean().combine(
    reducer2=ee.Reducer.max(),
    sharedInputs=True
    )

In [63]:
# Function to calculate mean and min of variables
def calculate_pdsi_tmmn(year):
    start_date = f"{year}-01-01"
    end_date = f"{year}-12-31"
    
    # Filter TerraClimate for the year
    # Select PDSI and TMMN
    # Take the mean and the min of each
    yearly_data = terraclimate.filterDate(start_date, end_date) \
                    .select(["pdsi", "tmmn"]) \
                    .reduce(
                        reducer = mean_min_reducer
                    )
    
    # Reduce to weighted mean for each variable in the AMCs
    stats = yearly_data.reduceRegions(
        collection=amc_fc,
        reducer=ee.Reducer.mean(),
        scale=4638.3,
        crs="EPSG:4326"
    )
    
    # Convert to pandas DataFrame
    df = geemap.ee_to_df(stats)
    df["year"] = year  # Add year column

    return df[["code_amc", "year", "pdsi_min", "pdsi_mean", "tmmn_min", "tmmn_mean"]]

# Function to calculate mean and max of variables
def calculate_tmmx(year):
    start_date = f"{year}-01-01"
    end_date = f"{year}-12-31"
    
    # Filter TerraClimate for the year
    # Select TMMX
    # Take the mean and the min of each
    yearly_data = terraclimate.filterDate(start_date, end_date) \
                    .select("tmmx") \
                    .reduce(
                        reducer = mean_max_reducer
                    )
    
    # Reduce to weighted mean for each variable in the AMCs
    stats = yearly_data.reduceRegions(
        collection=amc_fc,
        reducer=ee.Reducer.mean(),
        scale=4638.3,
        crs="EPSG:4326"
    )
    
    # Convert to pandas DataFrame
    df = geemap.ee_to_df(stats)
    df["year"] = year  # Add year column
    
    return df[["code_amc", "year", "tmmx_max", "tmmx_mean"]]

# Function to calculate precipitation
# Sums through the year and take the weighted mean in the regions
def calculate_pr(year):
    start_date = f"{year}-01-01"
    end_date = f"{year}-12-31"
    
    # Filter TerraClimate for the year
    yearly_data = terraclimate.filterDate(start_date, end_date) \
                    .select("pr") \
                    .sum()
    
    # Reduce to sum of precipitation
    stats = yearly_data.reduceRegions(
        collection=amc_fc,
        reducer=ee.Reducer.mean(),
        scale=4638.3,
        crs="EPSG:4326"
    )
    
    # Convert to pandas DataFrame
    df = geemap.ee_to_df(stats)
    df["year"] = year  # Add year column

    # Rename column to pr
    df.rename({"mean": "pr"}, axis = 1, inplace=True)

    return df[["code_amc", "year", "pr"]]

In [None]:

# Process all years and combine results
years = range(2000, 2024)

# Calculate pdsi and tmmn variables
pdsi_tmmn_results = [calculate_pdsi_tmmn(year) for year in years]
pdsi_tmmn_df = pd.concat(pdsi_tmmn_results)
print("Done: PDSI and TMMN")

In [None]:

# Calculate tmmx variables
tmmx_results = [calculate_tmmx(year) for year in years]
tmmx_df = pd.concat(tmmx_results)
print("Done: TMMX")


In [None]:

# Calculate precipitation
pr_results = [calculate_pr(year) for year in years]
pr_df = pd.concat(pr_results)
print("Done: Precipitation")

# Merge results
final_df = (pdsi_tmmn_df
            .merge(tmmx_df, on=["code_amc", "year"])
            .merge(pr_df, on=["code_amc", "year"])
)

final_df.head()


Done: Precipitation


Unnamed: 0,code_amc,year,pdsi_min,pdsi_mean,tmmn_min,tmmn_mean,tmmx_max,tmmx_mean,pr
0,10001,2000,-180.596486,-37.791641,125.789303,148.984715,294.498921,268.917694,901.529747
1,10002,2000,-260.619691,-203.035506,169.234191,190.477284,320.956466,299.80239,427.805492
2,10003,2000,180.33546,303.986364,174.622607,191.610871,303.692799,282.67225,1259.158614
3,10004,2000,-135.009927,27.037705,168.240073,185.643983,297.07197,277.138998,709.346003
4,10005,2000,-48.251733,148.462871,168.844554,187.581477,289.40854,270.348205,882.411757


In [66]:
# Save to CSV
final_df.to_csv("../Variables/terraclimate.csv", index=False)

# Save to serialised Pickle
final_df.to_pickle("../Variables/terraclimate.pkl")