In [None]:
import numpy as np
from scipy.optimize import minimize
import matplotlib.pyplot as plt
import pandas as pd

In this file the pelt algorithm is customised with precip covariates, with lagged effects, a segment filtering mechanism and truning points as guidance.

In [None]:
start_date = '2020-01-01'  
end_date = '2020-03-01'    
precip_threshold = 0.015  
precip_lag = 2 
min_seg_length = 5  
lambda_value = 2  
cost_threshold = 100  
initial_guess = [0.30817061, 0.00337174, 0.01628086]  
max_iterations = 5000  
gamma_max = 10 

Access data and aggregate to same resolutions

In [None]:
sm_path = ''
data = pd.read_csv(sm_path, parse_dates=['date'])
data.set_index('date', inplace=True)
data['soil_moisture'] = pd.to_numeric(data['soil_moisture'])
filtered_data = data[(data['latitude'] == 41.948936) & (data['longitude'] == -93.687760)]
resampled_data = filtered_data['soil_moisture'].iloc[::2]

precip_path = ''
precip_data = pd.read_csv(precip_path)
precip_data['time'] = pd.to_datetime(precip_data['time'])
precip_data.set_index('time', inplace=True)
precip_resampled = precip_data['tp'].resample('6H').sum()

filter by date range

In [None]:
def get_sm(filtered_data, start_date=None, end_date=None):
    if start_date:
        filtered_data = filtered_data[filtered_data.index >= start_date]
    if end_date:
        filtered_data = filtered_data[filtered_data.index <= end_date]
    
    return filtered_data.values  

def getPrecipData(precip_data, start_date=None, end_date=None):
    if start_date:
        precip_data = precip_data[precip_data.index >= start_date]
    if end_date:
        precip_data = precip_data[precip_data.index <= end_date]
    
    return precip_data.values 


In [None]:
Y = get_sm(resampled_data, start_date, end_date)
precip = getPrecipData(precip_resampled, start_date, end_date)

find the turning points

In [None]:
def detectTurningPoint(Y):
    gradient = np.diff(Y)
    start_points = np.where((gradient[:-1] > 0) & (gradient[1:] < 0))[0] + 1
    return start_points

gradient_start_points = detectTurningPoint(Y)

In [None]:
#defne the different precipitation transformations
def precipStep(precip, threshold, lag):
    precip_events = (precip > threshold).astype(int)
    step_function = np.cumsum(np.roll(precip_events, lag))
    return step_function

def precipIndicators(precip, threshold, lag):
    event_indices = np.where(precip > threshold)[0]
    indicators = np.zeros((len(precip), len(event_indices)))
    for i, event_index in enumerate(event_indices):
        indicators[event_index:, i] = 1 
    indicators = np.roll(indicators, lag, axis=0)
    return indicators

#decay models for each of the transofrmations
def expDecayStep(t, alpha_0, alpha_1, gamma_i, tau_i, step_function):
    gamma_i = np.clip(gamma_i, -gamma_max, gamma_max) 
    return alpha_0 + alpha_1 * np.exp(-np.exp(gamma_i) * (t - tau_i)) + step_function[t]

def expDecayIndicators(t, alpha_0, alpha_1, gamma_i, tau_i, indicators):
    gamma_i = np.clip(gamma_i, -gamma_max, gamma_max)  
    return alpha_0 + alpha_1 * np.exp(-np.exp(gamma_i) * (t - tau_i)) + np.sum(indicators[t, :])

using the core algorithm from       
      author="M. Gong, R. Killick, C. Nemeth, J. Quinton",
      title="A changepoint approach to modelling non-stationary soil moisture dynamics", 
      year="2024",
      eprint="2310.17546",
      archivePrefix="arXiv",
      url="https://arxiv.org/abs/2310.17546"

In [None]:

def nll(params, Y, tau_i, tau_ip1, precip_transformed, transformation='step'):
    alpha_0, alpha_1, gamma_i = params
    n = tau_ip1 - tau_i
    t = np.arange(tau_i, tau_ip1)
    
    if transformation == 'step':
        predicted = expDecayStep(t, alpha_0, alpha_1, gamma_i, tau_i, precip_transformed)
    elif transformation == 'indicators':
        predicted = expDecayIndicators(t, alpha_0, alpha_1, gamma_i, tau_i, precip_transformed)
    
    residuals = Y[tau_i:tau_ip1] - predicted
    cost = n * np.log(2 * np.pi) + np.sum(residuals ** 2)
    
    return 2 * cost


def estimate_parameters(Y, tau_i, tau_ip1, precip_transformed, transformation):
    try:
        result = minimize(nll, initial_guess, args=(Y, tau_i, tau_ip1, precip_transformed, transformation),
                          options={'maxiter': max_iterations})
        
        if result.success:
            return result.x
        else:
            return None
    except Exception as e:
        return None
    

def pelt(Y, gradient_start_points, precip_transformed, transformation, min_seg_length, lambda_value, cost_threshold):
    change_points = []
    parameters = []
    original_indices = np.arange(len(Y))

    for start in gradient_start_points:
        if start >= len(Y):
            break
        
        F_min = np.inf
        best_cp = None
        best_params = None

        for tau_ip1 in range(start + min_seg_length, len(Y) + 1):
            params = estimate_parameters(Y, start, tau_ip1, precip_transformed, transformation)
            if params is not None:
                segment_cost = nll(params, Y, start, tau_ip1, precip_transformed, transformation)

                if segment_cost <= cost_threshold:
                    total_cost = segment_cost + lambda_value
                    if total_cost < F_min and tau_ip1 < len(Y): 
                        F_min = total_cost
                        best_cp = tau_ip1
                        best_params = params

        if best_params is not None and best_cp is not None and best_cp < len(original_indices):  
            if len(change_points) == 0 or (best_cp - change_points[-1]) >= min_seg_length:  
                parameters.append(best_params)
                change_points.append(original_indices[best_cp])
        else:
     
            Y = np.delete(Y, slice(start, best_cp))
            original_indices = np.delete(original_indices, slice(start, best_cp))

    return change_points, parameters


step_function = precipStep(precip, precip_threshold, precip_lag)
indicators = precipIndicators(precip, precip_threshold, precip_lag)
change_points_step, parameters_step = pelt(Y, gradient_start_points, step_function, 'step', min_seg_length, lambda_value, cost_threshold)
change_points_indicators, parameters_indicators = pelt(Y, gradient_start_points, indicators, 'indicators', min_seg_length, lambda_value, cost_threshold)

In [None]:
plt.figure(figsize=(20, 8))
plt.plot(Y, label="Soil Moisture")


for i, cp in enumerate(change_points_step):
    plt.axvline(cp, color='red', linestyle='--', label='Step Change Point' if i == 0 else "")

for i, cp in enumerate(change_points_indicators):
    plt.axvline(cp, color='blue', linestyle='-.', label='Indicator Change Point' if i == 0 else "")

plt.legend()
plt.title("Soil Moisture with Detected Change Points Using Transformed precip Data")
plt.xlabel("Time")
plt.ylabel("Soil Moisture")
plt.show()


plt.figure(figsize=(15, 10))

plt.subplot(3, 2, 1)
plt.hist([param[0] for param in parameters_step], bins=20, color='blue', alpha=0.7)
plt.title('Step Function: Alpha_0 (Asymptotic SM)')
plt.xlabel('Alpha_0')
plt.ylabel('Frequency')

plt.subplot(3, 2, 2)
plt.hist([param[0] for param in parameters_indicators], bins=20, color='red', alpha=0.7)
plt.title('Indicators: Alpha_0 (Asymptotic SM)')
plt.xlabel('Alpha_0')
plt.ylabel('Frequency')

plt.subplot(3, 2, 3)
plt.hist([param[1] for param in parameters_step], bins=20, color='blue', alpha=0.7)
plt.title('Step: Alpha_1 (SM Amplitude)')
plt.xlabel('Alpha_1')
plt.ylabel('Frequency')

plt.subplot(3, 2, 4)
plt.hist([param[1] for param in parameters_indicators], bins=20, color='red', alpha=0.7)
plt.title('Indicators: Alpha_1 (SM Amplitude)')
plt.xlabel('Alpha_1')
plt.ylabel('Frequency')

plt.subplot(3, 2, 5)
plt.hist([param[2] for param in parameters_step], bins=20, color='blue', alpha=0.7)
plt.title('Step: Gamma_i (Decay Rate)')
plt.xlabel('Gamma_i')
plt.ylabel('Frequency')

plt.subplot(3, 2, 6)
plt.hist([param[2] for param in parameters_indicators], bins=20, color='red', alpha=0.7)
plt.title('Indicators: Gamma_i (Decay Rate)')
plt.xlabel('Gamma_i')
plt.ylabel('Frequency')

plt.tight_layout()
plt.show()


In [None]:
def expDecayStep(t, alpha_0, alpha_1, gamma_i, tau_i, step_function, beta_i):
    gamma_i = np.clip(gamma_i, -gamma_max, gamma_max)  
    return alpha_0 + (alpha_1 + step_function[t] * beta_i) * np.exp(-np.exp(gamma_i) * (t - tau_i))
def expDecayIndicators(t, alpha_0, alpha_1, gamma_i, tau_i, indicators, beta_i):
    gamma_i = np.clip(gamma_i, -gamma_max, gamma_max)  
    return alpha_0 + (alpha_1 + np.sum(indicators[t, :]) * beta_i) * np.exp(-np.exp(gamma_i) * (t - tau_i))
#in this case beta included
def nll(params, Y, tau_i, tau_ip1, precip_transformed, transformation='step'):
    alpha_0, alpha_1, gamma_i, beta_i = params  
    n = tau_ip1 - tau_i
    t = np.arange(tau_i, tau_ip1)
    
    if transformation == 'step':
        predicted = expDecayStep(t, alpha_0, alpha_1, gamma_i, tau_i, precip_transformed, beta_i)
    elif transformation == 'indicators':
        predicted = expDecayIndicators(t, alpha_0, alpha_1, gamma_i, tau_i, precip_transformed, beta_i)
    
    residuals = Y[tau_i:tau_ip1] - predicted
    cost = n * np.log(2 * np.pi) + np.sum(residuals ** 2)
    
    return 2 * cost


file_path = ''
data = pd.read_csv(file_path, parse_dates=['date'])
data.set_index('date', inplace=True)
data['soil_moisture'] = pd.to_numeric(data['soil_moisture'])


filtered_data = data[(data['latitude'] == 41.948936) & (data['longitude'] == -93.687760)]
resampled_data = filtered_data['soil_moisture'].iloc[::2]
Y = get_sm(resampled_data, start_date, end_date)
precip_file_path = ''
precip_data = pd.read_csv(precip_file_path)
precip_data['time'] = pd.to_datetime(precip_data['time'])
precip_data.set_index('time', inplace=True)
precip_resampled = precip_data['tp'].resample('6H').sum()

precip = getPrecipData(precip_resampled, start_date, end_date)

step_function = precipStep(precip, precip_threshold, precip_lag)
indicators = precipIndicators(precip, precip_threshold, precip_lag)
gradient_start_points = detectTurningPoint(Y)
change_points_step, parameters_step = pelt(Y, gradient_start_points, step_function, 'step', min_seg_length, lambda_value, cost_threshold)
change_points_indicators, parameters_indicators = pelt(Y, gradient_start_points, indicators, 'indicators', min_seg_length, lambda_value, cost_threshold)
plt.figure(figsize=(20, 8))
plt.plot(Y, label="Soil Moisture")


for i, cp in enumerate(change_points_step):
    plt.axvline(cp, color='red', linestyle='--', label='Step Change Point' if i == 0 else "")

for i, cp in enumerate(change_points_indicators):
    plt.axvline(cp, color='blue', linestyle='-.', label='Indicator Change Point' if i == 0 else "")

plt.legend()
plt.title("Soil Moisture with Detected Change Points Using Different precip Models")
plt.xlabel("Time")
plt.ylabel("Soil Moisture")
plt.show()


plt.figure(figsize=(15, 10))

plt.subplot(3, 2, 1)
plt.hist([param[0] for param in parameters_step], bins=20, color='blue', alpha=0.7)
plt.title('Step Function:Alpha_0 (Asymptotic Value)')
plt.xlabel('Alpha_0')
plt.ylabel('Frequency')

plt.subplot(3, 2, 2)
plt.hist([param[0] for param in parameters_indicators], bins=20, color='red', alpha=0.7)
plt.title('Indicators Alpha_0 (Asymptotic Value)')
plt.xlabel('Alpha_0')
plt.ylabel('Frequency')

plt.subplot(3, 2, 3)
plt.hist([param[1] for param in parameters_step], bins=20, color='blue', alpha=0.7)
plt.title('Step Function Alpha_1 (Amplitude)')
plt.xlabel('Alpha_1')
plt.ylabel('Frequency')

plt.subplot(3, 2, 4)
plt.hist([param[1] for param in parameters_indicators], bins=20, color='red', alpha=0.7)
plt.title('Indicators Alpha_1 (Amplitude)')
plt.xlabel('Alpha_1')
plt.ylabel('Frequency')

plt.subplot(3, 2, 5)
plt.hist([param[2] for param in parameters_step], bins=20, color='blue', alpha=0.7)
plt.title('Step Function: Gamma_i (Decay Rate)')
plt.xlabel('Gamma_i')
plt.ylabel('Frequency')

plt.subplot(3, 2, 6)
plt.hist([param[2] for param in parameters_indicators], bins=20, color='red', alpha=0.7)
plt.title('Indicators:   Gamma_i (Decay Rate)')
plt.xlabel('Gamma_i')
plt.ylabel('Frequency')

plt.tight_layout()
plt.show()
