In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.optimize import minimize
from statsmodels.tsa.tsatools import lagmat
import ruptures as rpt
from scipy.optimize import least_squares, curve_fit


In [None]:
file_path = ''
data = pd.read_csv(file_path, parse_dates=['date'])
data.set_index('date', inplace=True)
data['soil_moisture'] = pd.to_numeric(data['soil_moisture'])
soil_data = data[(data['latitude'] == 41.948936) & (data['longitude'] == -93.687760)]

print(soil_data.head())
plt.figure(figsize=(12, 6))
plt.plot(soil_data.index, soil_data['soil_moisture'], label='Soil Moisture', color='green')


In [None]:
soil_data_resampled = soil_data.resample('18H').mean().interpolate()
Y = soil_data_resampled['soil_moisture'].values

In [None]:
def expDecay(t, alpha_0, alpha_1, gamma_i, tau_i):
    return alpha_0 + alpha_1 * np.exp(-np.exp(gamma_i) * (t - tau_i))

t = np.arange(1, 10)
alpha_0 = 0.5
alpha_1 = 0.5
gamma_i = 0.1
tau_i = 0

ans = expDecay(t, alpha_0, alpha_1, gamma_i, tau_i)
print(ans)


In [None]:
def nll(params, Y, tau_i, tau_ip1):
    alpha_0, alpha_1, gamma_i = params[:3]  
    t = np.arange(1, (tau_ip1 - tau_i) + 1)
    resid = (Y[tau_i:tau_ip1] - expDecay(t, alpha_0, alpha_1, gamma_i, 0)/ np.sqrt(tau_ip1 - tau_i))
    cost = (tau_ip1 - tau_i) * np.log(2 * np.pi) + np.sum(resid ** 2)
    
    #print(f"negative log-likelihood: tau_i={tau_i}, prev_tau={tau_ip1}, params={params}, cost={cost}")
    return cost


In [None]:
def estimate(Y, tau_i, tau_prev):
    initial_guess = [0.5, 0.5, 0.1]  
    result = minimize(nll, initial_guess, args=(Y, tau_i, tau_prev),
                      options={'maxiter': 1000})  
    
    if result.success:
        print(f"success! tau_i={tau_i}, tau_prev={tau_prev}, params={result.x}")
        return result.x 
    else:
        print(f"fail: tau_i={tau_i}, tau_prev={tau_prev}")

        return [1e6, 1e6, 1e6]
tau_i = 0
tau_prev = 9
estimated_params = estimate(Y, tau_i, tau_prev)
print(estimated_params)


In [None]:
def costFunction(Y, changepoints, n, k):
    k = len(changepoints) - 1
    cost = 0
    parameters = []
    for i in range(k):
        tau_i, tau_prev= changepoints[i], changepoints[i + 1]
        params = estimate(Y, tau_i, tau_prev)
        parameters.append(params)
        segment_cost = nll(params, Y, tau_i, tau_prev)
        cost += segment_cost
        print(f"Seg cost: tau_i={tau_i}, tau_prev={tau_prev}, cost={segment_cost}")
    
    likelihood = np.exp(-cost / 2) if cost < 700 else 0  
    penalty = np.log(n) * k * 10  
    BIC = penalty - 2 * np.log(likelihood + 1e-10) 
    
    print(f"cps={changepoints}, BIC={BIC}, likelihood={likelihood}")
    return BIC, parameters


In [None]:

synthetic_params= [
    [0.5, 0.5, -0.1],  
    [0.2, 0.4, -0.15],  
    [0.7, 0.3, -0.2]    
]

def exponential_decay(t, alpha_0, alpha_1, gamma_i, tau_i):
    return alpha_0 + alpha_1 * np.exp(-np.exp(gamma_i) * (t - tau_i))

def negative_log_likelihood(params, Y, tau_i, tau_ip1):
    alpha_0, alpha_1, gamma_i = params
    if tau_ip1 > tau_i:
        n = tau_ip1 - tau_i
        t = np.arange(1, n + 1)
        residuals = Y[tau_i:tau_ip1] - exponential_decay(t, alpha_0, alpha_1, gamma_i, 0)
        cost = n * np.log(2 * np.pi) + np.sum(residuals ** 2)
    else:
        print(f"{tau_ip1} smaller or  equal to {tau_i}")
    return cost

def find_param(Y, tau_i, tau_ip1):
    initial_guess = [0.5, 0.4, -0.15]  
    result = minimize(negative_log_likelihood, initial_guess, args=(Y, tau_i, tau_ip1))
    if result.success:
        return result.x  
    else:
        return [1e6, 1e6, 1e6]  

class ExponentialCost(rpt.costs.CostL2):
    def __init__(self):
        super().__init__()

    def fit(self, signal):
        self.signal = signal
        return self
    def error(self, start, end):
        if end - start < 2:  
            return np.inf
        params = find_param(self.signal, start, end)
        return negative_log_likelihood(params, self.signal, start, end)

np.random.seed(0)
Y_synthetic = np.concatenate([0.5 + 0.5 * np.exp(-0.1 * np.arange(1, 50)),0.2 + 0.4 * np.exp(-0.15 * np.arange(1, 50)),
                            0.7 + 0.3 * np.exp(-0.2 * np.arange(1, 50))]) + 0.01 * np.random.randn(147)

algo = rpt.Pelt(custom_cost=ExponentialCost(), min_size=3, jump=1).fit(Y_synthetic)
result = algo.predict(pen=1)

param_result = []
print("Estimated Parameters for each segment:")
for i in range(len(result) - 1):
    start, end = result[i], result[i + 1]
    params = find_param(Y_synthetic, start, end)
    param_result.append(params)
    print(f"Segment [{start}:{end}] -> alpha_0: {params[0]:.4f}, alpha_1: {params[1]:.4f}, gamma: {params[2]:.4f}")


alpha_0_error = np.mean([abs(true[0] - est[0]) for true, est in zip(synthetic_params, param_result)])
alpha_1_error = np.mean([abs(true[1] - est[1]) for true, est in zip(synthetic_params, param_result)])
gamma_error = np.mean([abs(true[2] - est[2]) for true, est in zip(synthetic_params, param_result)])

print(f"MAE alpha_0: {alpha_0_error:.4f}")
print(f"MAE alpha_1: {alpha_1_error:.4f}")
print(f"MAE gamma: {gamma_error:.4f}")


plt.figure(figsize=(10, 6))
plt.plot(Y_synthetic, label="Synthetic Data")
plt.axvline(result[0], color="r", linestyle="--", label="Changepoint")
for cp in result[1:]:
    plt.axvline(cp, color="r", linestyle="--")  
plt.legend()
plt.title("Changepoint Detection on Synthetic Data")
plt.show()


In [None]:
path = ''
data = pd.read_csv(file_path, parse_dates=['date'])
data.set_index('date', inplace=True)
data['soil_moisture'] = pd.to_numeric(data['soil_moisture'])
location_sm = data[(data['latitude'] == 41.948936) & (data['longitude'] == -93.687760)]

def turning_points(Y):
    gradient = np.diff(Y)
    starts = np.where((gradient[:-1] > 0) & (gradient[1:] <= 0))[0] + 1
    return starts

def timeframe_sm(location_sm, start=None, end=None):
    if start:
        location_sm= location_sm[location_sm.index >= start]
    if end:
        location_sm= location_sm[location_sm.index <= end]
    return location_sm.values 

resampled = location_sm['soil_moisture'].resample('18H').mean()
start = '2020-01-01'
end= '2020-10-01'
Y = timeframe_sm(resampled, start, end)
gradient_cps = turning_points(Y)

plt.figure(figsize=(20, 8))
plt.plot(Y, label="Soil Moisture")
for cp in gradient_cps:
    plt.axvline(cp, color='green', linestyle='--', label=f'Gradient Change ' if cp == gradient_cps[0] else "")  
plt.legend()
plt.title("Soil Moisture with Detected Gradient Change Points")
plt.xlabel("Time")
plt.ylabel("Soil Moisture")
plt.show()


In [None]:
import numpy as np
from scipy.optimize import minimize
import matplotlib.pyplot as plt
import pandas as pd

start = '2020-01-01'
end = '2020-10-01'

def exponential_decay(t, alpha_0, alpha_1, gamma_i, tau_i):
    return alpha_0 + alpha_1 * np.exp(-np.exp(gamma_i) * (t - tau_i))

def negative_log_likelihood(params, Y, tau_i, tau_ip1):
    alpha_0, alpha_1, gamma_i = params
    n = tau_ip1 - tau_i
    t = np.arange(tau_i, tau_ip1) 
    residuals = Y[tau_i:tau_ip1] - exponential_decay(t, alpha_0, alpha_1, gamma_i, tau_i)
    cost = n * np.log(2 * np.pi) + np.sum(residuals ** 2)
    return 2 * cost 

def extract_params(Y, tau_i, tau_ip1):
    initial_guess = [0.30817061, -0.00337174, 0.01628086]
    try:
        result = minimize(negative_log_likelihood, initial_guess, args=(Y, tau_i, tau_ip1),
                          options={'maxiter': 5000})
        if result.success:
            return result.x
        else:
            print(f"Non-convergence for Y[{tau_i}:{tau_ip1}]")
            return None
    except Exception as e:
        print(f"error in params: {e}")
        return None


def pelt_custom(Y, gradient_cps, min_seg_length=8, min_remove_length=5, lambda_value=1, cost_threshold=100):
    change_points = []
    parameters = [] 
    t = 0

    for start in gradient_cps:
        if start <= t:
            continue
        F_min = np.inf
        best_cp = start + min_seg_length
        best_params = None  
        for tau_ip1 in range(start + min_seg_length, len(Y) + 1):
            params = extract_params(Y, start, tau_ip1)
            if params is not None:
                segment_cost = negative_log_likelihood(params, Y, start, tau_ip1)
                if segment_cost <= cost_threshold:
                    total_cost = segment_cost + lambda_value
                    if total_cost < F_min:
                        F_min = total_cost
                        best_cp = tau_ip1
                        best_params = params 
        if best_params is not None:
            parameters.append(best_params)  
            change_points.append(best_cp)
            t = best_cp
    return change_points, parameters

resampled = location_sm['soil_moisture'].resample('18H').mean()


Y = timeframe_sm(resampled, start, end)
gradient_cps = turning_points(Y)
change_points, parameters = pelt_custom(Y, gradient_cps, lambda_value=1, cost_threshold=100)
print("Changepoints:", change_points)


plt.figure(figsize=(20, 8))
plt.plot(Y, label="Soil Moisture")
for i, cp in enumerate(change_points):
    if i == 0:
        plt.axvline(cp, color='red', linestyle='--', label='Changepoint')
    else:
        plt.axvline(cp, color='red', linestyle='--', label=None)

plt.legend()
plt.title("Soil Moisture with Detected Changepoints")
plt.xlabel("Time")
plt.ylabel("Soil Moisture")
plt.show()

alpha_0_vals = [param[0] for param in parameters]
alpha_1_vals = [param[1] for param in parameters]
gamma_i_vals = [param[2] for param in parameters]

plt.figure(figsize=(15, 10))

plt.subplot(3, 1, 1)
plt.hist(alpha_0_vals, bins=20, color='blue', alpha=0.7)
plt.title('Distribution of Alpha_0 (Asymptotic Value)')
plt.xlabel('Alpha_0')
plt.ylabel('Frequency')

plt.subplot(3, 1, 2)
plt.hist(alpha_1_vals, bins=20, color='green', alpha=0.7)
plt.title('Distribution of Alpha_1 (Amplitude)')
plt.xlabel('Alpha_1')
plt.ylabel('Frequency')

plt.subplot(3, 1, 3)
plt.hist(gamma_i_vals, bins=20, color='orange', alpha=0.7)
plt.title('Distribution of Gamma_i (Decay Rate)')
plt.xlabel('Gamma_i')
plt.ylabel('Frequency')

plt.tight_layout()
plt.show()
