In [1]:
import json
import os
import pickle
import keras
import sqlite3
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf
from scipy.signal import savgol_filter
from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.layers import LSTM, BatchNormalization, Dense, Dropout, Reshape
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l1, l2
import numpy as np
from sklearn.model_selection import TimeSeriesSplit
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error

In [2]:



def trim_start_end_nans(df):
    """
    Removes rows at the start and end of a DataFrame that have NaN values in any column.
    """
    # Initialize start_idx and end_idx based on the DataFrame's index type
    if isinstance(df.index, pd.DatetimeIndex):
        start_idx = df.index[0]  # Assume first index is earliest; adjust if necessary
        end_idx = df.index[-1]  # Assume last index is latest; adjust if necessary
    else:
        start_idx = 0
        end_idx = len(df) - 1

    for column in df.columns:
        # Find the first non-NaN index in the current column
        first_valid_index = df[column].first_valid_index()
        if first_valid_index is not None and df.index.get_loc(
            first_valid_index
        ) > df.index.get_loc(start_idx):
            start_idx = first_valid_index

        # Find the last non-NaN index in the current column
        last_valid_index = df[column].last_valid_index()
        if last_valid_index is not None and df.index.get_loc(
            last_valid_index
        ) < df.index.get_loc(end_idx):
            end_idx = last_valid_index

    # Trim the DataFrame
    return df.loc[start_idx:end_idx]




def process_data_for_plot(
    plot_number,
    target_columns,
    continuous_columns,
    start_date="2023-07-20",
    end_date="2023-09-03",
    rolling_windows=[3, 7], 
):
    """
    Process data for a given plot number within a specified date range. This includes:
    * Spike Detection (up and down) for VWC columns
    * Time since last significant precipitation
    * Cumulative precipitation within a time window
    * Rolling window statistics
    * Time Encoding
    """

    # Database connection
    conn = sqlite3.connect("processed_data.db")
    query = "SELECT * FROM data_table"
    df = pd.read_sql_query(query, conn)
    conn.close()

    # Filter by plot_number and date range
    df = df[
        (df["plot_number"] == plot_number)
        & (df["TIMESTAMP"] >= start_date)
        & (df["TIMESTAMP"] <= end_date)
    ]

    # Convert TIMESTAMP to datetime
    df["TIMESTAMP"] = pd.to_datetime(df["TIMESTAMP"])
    df.set_index("TIMESTAMP", inplace=True)

    # Sort by TIMESTAMP 
    df.sort_values(by="TIMESTAMP", inplace=True)

    # Select relevant columns
    df = df[continuous_columns + target_columns]

    # Resample to daily frequency 
    df = df.resample("D").mean()

    # Spike detection for VWC columns
    for col in df.columns:
        if "VWC" in col:
            df[f"{col}_spike_up"] = (df[col] > df[col].shift(1) * 1.15).astype(int)  # 15% increase
            df[f"{col}_spike_down"] = (df[col] < df[col].shift(1) * 0.85).astype(int)  # 15% decrease

    # Time features
    df['time_index'] = np.arange(len(df))

    # Time since precipitation (modify thresholds as needed)
    significant_precip_threshold = 0.5  
    max_precip_value = df['precip_irrig'].max()
    df['time_since_last_significant_precip'] = (df['precip_irrig'] > significant_precip_threshold).astype(int)
    df['time_since_last_significant_precip'] = df['time_since_last_significant_precip'].replace(to_replace=0, method='ffill')
    df['time_since_last_half_max_precip'] = (df['precip_irrig'] > (max_precip_value / 2)).astype(int)
    df['time_since_last_half_max_precip'] = df['time_since_last_half_max_precip'].replace(to_replace=0, method='ffill')

    # Cumulative precipitation (replace 4 with the desired window)
    df['precip_irrig_cumulative_4day'] = df['precip_irrig'].rolling(4).sum() 

    # Preprocessing 
    df = df.interpolate(method="pchip")

    # Rolling window features
    for window in rolling_windows:
        for col in continuous_columns:
            df[f'{col}_rolling_mean_{window}'] = df[col].rolling(window=window).mean()
            df[f'{col}_rolling_std_{window}'] = df[col].rolling(window=window).std()

    return df

In [3]:
def subtract_mean(df, target_columns, continuous_columns):
    # Subtract mean from each column (append new columns with suffix "_mean_subtracted")
    df_mean_subtracted = df.copy()
    mean_values = {}
    for col in df_mean_subtracted.columns:
        if col in [target_columns + continuous_columns]:
            mean_values[col] = df_mean_subtracted[col].mean()
            df_mean_subtracted[col] = df_mean_subtracted[col] - mean_values[col]
    return df_mean_subtracted, mean_values

def create_derivative_columns(df, target_columns, continuous_columns):
    initial_values = {}
    for col in df.columns:  # Change to apply to all columns
        if col in [target_columns + continuous_columns]:
            initial_values[col] = df[col].iloc[0]
        deriv_col_name = f"{col}_deriv" 
        df[deriv_col_name] = df[col].diff().fillna(0)  # Fill NaN with 0 for initial diff
    return df, initial_values


def transform_and_scale_data(df, target_columns, continuous_columns):
    df_transformed = df.copy()
    df_transformed, mean_values = subtract_mean(df_transformed, target_columns, continuous_columns)  # Change here to apply to all
    df_transformed, initial_values = create_derivative_columns(df_transformed, target_columns, continuous_columns)
    df_transformed["precip_irrig_bool"] = df_transformed["precip_irrig"].apply(
        lambda x: 1 if x > 0 else 0
    )
    
    
    return df_transformed



In [4]:
import numpy as np
import xgboost as xgb
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_squared_error
import os
import pickle

def train_and_save_model_with_time_series_validation(X, y, forecast_day, model_save_path):
    # Initialize TimeSeriesSplit
    tscv = TimeSeriesSplit(n_splits=5)

    # Best model placeholder
    best_model = None
    best_rmse = float("inf")

    # Iterate over each train-test split
    for train_index, val_index in tscv.split(X):
        # Split data
        X_train, X_val = X[train_index], X[val_index]
        y_train, y_val = y[train_index], y[val_index]

        # Prepare DMatrices
        dtrain = xgb.DMatrix(X_train, label=y_train)
        dval = xgb.DMatrix(X_val, label=y_val)

        # XGBoost parameters
        param = {
        'max_depth': 5,
        'eta': 0.05, 
        'objective': 'reg:squarederror',
        'eval_metric': 'rmse',
        'subsample': 0.8,
        'colsample_bytree': 1, 
        'lambda': 1,
        'alpha': 0.2,
        'gamma': 0.2 
    }


        num_round = 2000  # Number of training iterations

        # Train XGBoost model with early stopping
        bst = xgb.train(
            param,
            dtrain,
            num_round,
            [(dtrain, 'train'), (dval, 'val')],
            early_stopping_rounds=20,
            verbose_eval=True
        )

        # Predict on validation set
        y_val_pred = bst.predict(dval)

        # Calculate RMSE for the current split
        val_rmse = np.sqrt(mean_squared_error(y_val, y_val_pred))
        print(f"Validation RMSE for current split: {val_rmse:.2f}")

        # Update best model if improvement
        if val_rmse < best_rmse:
            best_rmse = val_rmse
            best_model = bst

    # Save the best model
    best_model_save_path = os.path.join(model_save_path, f"best_model_day_{forecast_day}.json")
    os.makedirs(os.path.dirname(best_model_save_path), exist_ok=True)
    best_model.save_model(best_model_save_path)

    return best_model_save_path


In [5]:
import pandas as pd
import numpy as np
import os
import sqlite3
import pickle
from xgboost import XGBRegressor
import numpy as np
import matplotlib.pyplot as plt

# Assuming the necessary functions are defined as provided: 
# trim_start_end_nans, process_data_for_plot, subtract_mean, create_derivative_columns, transform_and_scale_data




def prepare_and_train_models(plot_numbers, target_column, continuous_columns, forecast_horizon, model_save_path):
    """
    Prepare data, train models for each forecast horizon, and save the models.
    """
    for plot_number in plot_numbers:
        # Process and transform data
        df = process_data_for_plot(plot_number, [target_column], continuous_columns)
        df = trim_start_end_nans(df)
        df_transformed = transform_and_scale_data(df, [target_column], continuous_columns)

        # Define training data
        X = df_transformed.drop(columns=[target_column]).values
        y = df_transformed[target_column].values

        # Train and save model for each forecast horizon
        for forecast_day in range(1, forecast_horizon + 1):  # Example: 3-day forecast horizon
            train_and_save_model_with_time_series_validation(X, y, forecast_day, model_save_path)
            print(f"Model for day {forecast_day} trained and saved for plot {plot_number}")

def predict_with_model(model_path, X):
    """
    Load a model from a file and make predictions.
    """
    model = xgb.Booster()
    model.load_model(model_path)
    return model.predict(X)

def run_inference(models_path, plot_number, target_column, continuous_columns, forecast_horizon):
    # Process and transform data for inference
    df = process_data_for_plot(plot_number, [target_column], continuous_columns)
    df = trim_start_end_nans(df)
    df_transformed = transform_and_scale_data(df, [target_column], continuous_columns)

    X = df_transformed.drop(columns=[target_column]).values
    y_actual = df_transformed[target_column].values

    predictions = np.zeros((len(X) - forecast_horizon + 1, forecast_horizon))

    for day in range(1, forecast_horizon + 1):
        model_path = os.path.join(models_path, f"best_model_day_{day}.json")
        pred = predict_with_model(models_path, X[:-(forecast_horizon - day) if (forecast_horizon - day) > 0 else None])
        for i in range(min(len(pred), len(predictions))):
            predictions[i, day - 1] = pred[i]

    # Filter out rows where all values are zero (assuming zero predictions are not expected)
    predictions = predictions[~np.all(predictions == 0, axis=1)]

    # Flatten predictions and actuals to plot on the same curve
    flat_predictions = predictions.flatten()
    # Adjust actuals to match the length of filtered predictions
    adjusted_length = len(flat_predictions) // forecast_horizon
    flat_actuals = np.array([y_actual[i:i+forecast_horizon] for i in range(adjusted_length)]).flatten()

    # Plotting
    plt.figure(figsize=(14, 7))
    plt.plot(flat_actuals, label='Actual', linestyle='-', marker='o')
    plt.plot(flat_predictions, label='Predicted', linestyle='--', marker='x')

    plt.title(f'Predictions vs Actual Values for Plot {plot_number}')
    plt.xlabel('Time Step')
    plt.ylabel(target_column)
    plt.legend()
    plt.show()

    return predictions


# Example usage
plot_numbers = [2001]  # Example plot numbers for training
target_column = "VWC_06"
continuous_columns = [
    "Ta_2m_Avg", "RH_2m_Avg", "Solar_2m_Avg", "WndAveSpd_3m", "Rain_1m_Tot",
    "Dp_2m_Avg", "TaMax_2m", "TaMin_2m", "RHMax_2m", "RHMin_2m",
    "HeatIndex_2m_Avg", "irrigation", "precip_irrig", "canopy_temp",
    "VWC_18", "VWC_30"
]

model_save_path = r"C:\Users\bnsoh2\Desktop\models"
forecast_horizon = 4

# Train models
prepare_and_train_models(plot_numbers, target_column, continuous_columns, forecast_horizon, model_save_path)

# Inference
plot_number_for_inference = 2007  # Example plot number for inference
predictions = run_inference(model_save_path, plot_number_for_inference, target_column, continuous_columns, forecast_horizon)
print(predictions)


  df['time_since_last_significant_precip'] = df['time_since_last_significant_precip'].replace(to_replace=0, method='ffill')
  df['time_since_last_half_max_precip'] = df['time_since_last_half_max_precip'].replace(to_replace=0, method='ffill')


[0]	train-rmse:17.79192	val-rmse:24.89868
[1]	train-rmse:17.03437	val-rmse:24.14389
[2]	train-rmse:16.29370	val-rmse:23.40608
[3]	train-rmse:15.56330	val-rmse:22.67865
[4]	train-rmse:14.86585	val-rmse:21.98421
[5]	train-rmse:14.20924	val-rmse:21.33056
[6]	train-rmse:13.58448	val-rmse:20.70877
[7]	train-rmse:12.99607	val-rmse:20.12330
[8]	train-rmse:12.41453	val-rmse:19.54479
[9]	train-rmse:11.90309	val-rmse:19.03614
[10]	train-rmse:11.38134	val-rmse:18.51735
[11]	train-rmse:10.89563	val-rmse:18.03451
[12]	train-rmse:10.42929	val-rmse:17.57104
[13]	train-rmse:9.96333	val-rmse:17.10804
[14]	train-rmse:9.51942	val-rmse:16.66705
[15]	train-rmse:9.09428	val-rmse:16.24480
[16]	train-rmse:8.70337	val-rmse:15.85661
[17]	train-rmse:8.31720	val-rmse:15.47321
[18]	train-rmse:7.94836	val-rmse:15.10707
[19]	train-rmse:7.60248	val-rmse:14.76376
[20]	train-rmse:7.27175	val-rmse:14.43553
[21]	train-rmse:6.97754	val-rmse:14.14356
[22]	train-rmse:6.68051	val-rmse:13.84880
[23]	train-rmse:6.38708	val-rms



[83]	train-rmse:0.66323	val-rmse:7.58696
[84]	train-rmse:0.64278	val-rmse:7.56186
[85]	train-rmse:0.62251	val-rmse:7.53116
[86]	train-rmse:0.60294	val-rmse:7.50171
[87]	train-rmse:0.58459	val-rmse:7.47344
[88]	train-rmse:0.57229	val-rmse:7.47344
[89]	train-rmse:0.55570	val-rmse:7.44696
[90]	train-rmse:0.53990	val-rmse:7.42154
[91]	train-rmse:0.52872	val-rmse:7.40712
[92]	train-rmse:0.51241	val-rmse:7.38066
[93]	train-rmse:0.49612	val-rmse:7.35519
[94]	train-rmse:0.48154	val-rmse:7.34323
[95]	train-rmse:0.46744	val-rmse:7.31966
[96]	train-rmse:0.45854	val-rmse:7.30752
[97]	train-rmse:0.44607	val-rmse:7.28530
[98]	train-rmse:0.43554	val-rmse:7.27051
[99]	train-rmse:0.42412	val-rmse:7.25328
[100]	train-rmse:0.42028	val-rmse:7.24768
[101]	train-rmse:0.41203	val-rmse:7.23540
[102]	train-rmse:0.40086	val-rmse:7.21591
[103]	train-rmse:0.39082	val-rmse:7.19716
[104]	train-rmse:0.38556	val-rmse:7.18900
[105]	train-rmse:0.37742	val-rmse:7.17812
[106]	train-rmse:0.37230	val-rmse:7.17000
[107]	tra



[8]	train-rmse:14.35475	val-rmse:16.34504
[9]	train-rmse:13.74851	val-rmse:15.71826
[10]	train-rmse:13.13538	val-rmse:15.08227
[11]	train-rmse:12.57106	val-rmse:14.49478
[12]	train-rmse:12.02804	val-rmse:13.92720
[13]	train-rmse:11.59312	val-rmse:13.47079
[14]	train-rmse:11.10704	val-rmse:12.95847
[15]	train-rmse:10.66289	val-rmse:12.48799
[16]	train-rmse:10.23404	val-rmse:12.05143
[17]	train-rmse:9.82360	val-rmse:11.50057
[18]	train-rmse:9.41352	val-rmse:11.08707
[19]	train-rmse:9.04317	val-rmse:10.59965
[20]	train-rmse:8.65923	val-rmse:10.21710
[21]	train-rmse:8.31315	val-rmse:9.91433
[22]	train-rmse:7.96063	val-rmse:9.56634
[23]	train-rmse:7.61921	val-rmse:9.10078
[24]	train-rmse:7.35108	val-rmse:8.81652
[25]	train-rmse:7.05342	val-rmse:8.53074
[26]	train-rmse:6.76089	val-rmse:8.25445
[27]	train-rmse:6.49153	val-rmse:8.00181
[28]	train-rmse:6.28610	val-rmse:7.78574
[29]	train-rmse:6.02188	val-rmse:7.54335
[30]	train-rmse:5.79212	val-rmse:7.40997
[31]	train-rmse:5.56433	val-rmse:7.20



[11]	train-rmse:12.87270	val-rmse:10.52500
[12]	train-rmse:12.34768	val-rmse:9.98292
[13]	train-rmse:11.78279	val-rmse:9.39788
[14]	train-rmse:11.25792	val-rmse:8.99102
[15]	train-rmse:10.76342	val-rmse:8.60663
[16]	train-rmse:10.29682	val-rmse:8.06034
[17]	train-rmse:9.84544	val-rmse:7.69710
[18]	train-rmse:9.43198	val-rmse:7.26664
[19]	train-rmse:9.02681	val-rmse:6.95315
[20]	train-rmse:8.68698	val-rmse:6.59812
[21]	train-rmse:8.30634	val-rmse:6.30115
[22]	train-rmse:7.96435	val-rmse:6.05320
[23]	train-rmse:7.63265	val-rmse:5.65742
[24]	train-rmse:7.30270	val-rmse:5.40546
[25]	train-rmse:6.98606	val-rmse:5.17513
[26]	train-rmse:6.69080	val-rmse:4.87072
[27]	train-rmse:6.39125	val-rmse:4.66507
[28]	train-rmse:6.10955	val-rmse:4.46817
[29]	train-rmse:5.85367	val-rmse:4.28283
[30]	train-rmse:5.61639	val-rmse:4.01133
[31]	train-rmse:5.37503	val-rmse:3.84580
[32]	train-rmse:5.15766	val-rmse:3.59260
[33]	train-rmse:4.94365	val-rmse:3.44251
[34]	train-rmse:4.73392	val-rmse:3.30682
[35]	trai



[16]	train-rmse:9.89986	val-rmse:6.27496
[17]	train-rmse:9.45242	val-rmse:5.93087
[18]	train-rmse:9.04854	val-rmse:5.62001
[19]	train-rmse:8.64616	val-rmse:5.31444
[20]	train-rmse:8.29265	val-rmse:4.92313
[21]	train-rmse:7.91858	val-rmse:4.63459
[22]	train-rmse:7.57028	val-rmse:4.35893
[23]	train-rmse:7.23234	val-rmse:4.11614
[24]	train-rmse:6.91899	val-rmse:3.87801
[25]	train-rmse:6.60652	val-rmse:3.64498
[26]	train-rmse:6.30869	val-rmse:3.41561
[27]	train-rmse:6.03607	val-rmse:3.19635
[28]	train-rmse:5.76936	val-rmse:2.96941
[29]	train-rmse:5.51376	val-rmse:2.77654
[30]	train-rmse:5.28321	val-rmse:2.59668
[31]	train-rmse:5.05277	val-rmse:2.42742
[32]	train-rmse:4.83271	val-rmse:2.27039
[33]	train-rmse:4.62183	val-rmse:2.10848
[34]	train-rmse:4.43137	val-rmse:1.99684
[35]	train-rmse:4.24513	val-rmse:1.84341
[36]	train-rmse:4.05927	val-rmse:1.70539
[37]	train-rmse:3.87906	val-rmse:1.58050
[38]	train-rmse:3.70508	val-rmse:1.45734
[39]	train-rmse:3.56070	val-rmse:1.36755
[40]	train-rmse:



[8]	train-rmse:13.78287	val-rmse:10.11381
[9]	train-rmse:13.15129	val-rmse:9.46364
[10]	train-rmse:12.55565	val-rmse:8.84114
[11]	train-rmse:12.01114	val-rmse:8.27845
[12]	train-rmse:11.46430	val-rmse:7.70700
[13]	train-rmse:10.94397	val-rmse:7.16515
[14]	train-rmse:10.43502	val-rmse:6.62337
[15]	train-rmse:9.95912	val-rmse:6.21810
[16]	train-rmse:9.50678	val-rmse:5.72337
[17]	train-rmse:9.08288	val-rmse:5.28439
[18]	train-rmse:8.66038	val-rmse:4.85005
[19]	train-rmse:8.28067	val-rmse:4.42984
[20]	train-rmse:7.90617	val-rmse:4.02262
[21]	train-rmse:7.54127	val-rmse:3.73931
[22]	train-rmse:7.21484	val-rmse:3.39130
[23]	train-rmse:6.88915	val-rmse:3.13005
[24]	train-rmse:6.58149	val-rmse:2.88535
[25]	train-rmse:6.29396	val-rmse:2.58098
[26]	train-rmse:6.01278	val-rmse:2.34862
[27]	train-rmse:5.74392	val-rmse:2.08094
[28]	train-rmse:5.48986	val-rmse:1.88832
[29]	train-rmse:5.24368	val-rmse:1.70127
[30]	train-rmse:5.01635	val-rmse:1.52922
[31]	train-rmse:4.79555	val-rmse:1.36408
[32]	train




[19]	train-rmse:7.60248	val-rmse:14.76376
[20]	train-rmse:7.27175	val-rmse:14.43553
[21]	train-rmse:6.97754	val-rmse:14.14356
[22]	train-rmse:6.68051	val-rmse:13.84880
[23]	train-rmse:6.38708	val-rmse:13.55762
[24]	train-rmse:6.10530	val-rmse:13.27797
[25]	train-rmse:5.85225	val-rmse:13.02681
[26]	train-rmse:5.59668	val-rmse:12.77311
[27]	train-rmse:5.35511	val-rmse:12.53326
[28]	train-rmse:5.13351	val-rmse:12.31316
[29]	train-rmse:4.91432	val-rmse:12.09537
[30]	train-rmse:4.70938	val-rmse:11.89164
[31]	train-rmse:4.51509	val-rmse:11.69839
[32]	train-rmse:4.31945	val-rmse:11.50367
[33]	train-rmse:4.14707	val-rmse:11.33195
[34]	train-rmse:3.96858	val-rmse:11.15397
[35]	train-rmse:3.80179	val-rmse:10.98749
[36]	train-rmse:3.63656	val-rmse:10.82234
[37]	train-rmse:3.48204	val-rmse:10.66766
[38]	train-rmse:3.33596	val-rmse:10.52119
[39]	train-rmse:3.20101	val-rmse:10.38562
[40]	train-rmse:3.07473	val-rmse:10.25851
[41]	train-rmse:2.95620	val-rmse:10.13893
[42]	train-rmse:2.84252	val-rmse:



[28]	train-rmse:6.28610	val-rmse:7.78574
[29]	train-rmse:6.02188	val-rmse:7.54335
[30]	train-rmse:5.79212	val-rmse:7.40997
[31]	train-rmse:5.56433	val-rmse:7.20383
[32]	train-rmse:5.32977	val-rmse:6.88304
[33]	train-rmse:5.11575	val-rmse:6.70379
[34]	train-rmse:4.90954	val-rmse:6.52891
[35]	train-rmse:4.73706	val-rmse:6.38748
[36]	train-rmse:4.58283	val-rmse:6.17735
[37]	train-rmse:4.41206	val-rmse:6.05327
[38]	train-rmse:4.23544	val-rmse:5.91829
[39]	train-rmse:4.08257	val-rmse:5.75198
[40]	train-rmse:3.91962	val-rmse:5.56853
[41]	train-rmse:3.76408	val-rmse:5.45387
[42]	train-rmse:3.61329	val-rmse:5.34904
[43]	train-rmse:3.47236	val-rmse:5.17485
[44]	train-rmse:3.32965	val-rmse:5.07694
[45]	train-rmse:3.22039	val-rmse:4.96288
[46]	train-rmse:3.10522	val-rmse:4.88131
[47]	train-rmse:2.98137	val-rmse:4.80686
[48]	train-rmse:2.87068	val-rmse:4.73494
[49]	train-rmse:2.75446	val-rmse:4.66563
[50]	train-rmse:2.64355	val-rmse:4.60236
[51]	train-rmse:2.55174	val-rmse:4.54400
[52]	train-rmse:




[13]	train-rmse:11.78279	val-rmse:9.39788
[14]	train-rmse:11.25792	val-rmse:8.99102
[15]	train-rmse:10.76342	val-rmse:8.60663
[16]	train-rmse:10.29682	val-rmse:8.06034
[17]	train-rmse:9.84544	val-rmse:7.69710
[18]	train-rmse:9.43198	val-rmse:7.26664
[19]	train-rmse:9.02681	val-rmse:6.95315
[20]	train-rmse:8.68698	val-rmse:6.59812
[21]	train-rmse:8.30634	val-rmse:6.30115
[22]	train-rmse:7.96435	val-rmse:6.05320
[23]	train-rmse:7.63265	val-rmse:5.65742
[24]	train-rmse:7.30270	val-rmse:5.40546
[25]	train-rmse:6.98606	val-rmse:5.17513
[26]	train-rmse:6.69080	val-rmse:4.87072
[27]	train-rmse:6.39125	val-rmse:4.66507
[28]	train-rmse:6.10955	val-rmse:4.46817
[29]	train-rmse:5.85367	val-rmse:4.28283
[30]	train-rmse:5.61639	val-rmse:4.01133
[31]	train-rmse:5.37503	val-rmse:3.84580
[32]	train-rmse:5.15766	val-rmse:3.59260
[33]	train-rmse:4.94365	val-rmse:3.44251
[34]	train-rmse:4.73392	val-rmse:3.30682
[35]	train-rmse:4.53670	val-rmse:3.17386
[36]	train-rmse:4.33287	val-rmse:2.91749
[37]	train-



[13]	train-rmse:11.32116	val-rmse:7.64464
[14]	train-rmse:10.82304	val-rmse:7.23486
[15]	train-rmse:10.36204	val-rmse:6.75637
[16]	train-rmse:9.89986	val-rmse:6.27496
[17]	train-rmse:9.45242	val-rmse:5.93087
[18]	train-rmse:9.04854	val-rmse:5.62001
[19]	train-rmse:8.64616	val-rmse:5.31444
[20]	train-rmse:8.29265	val-rmse:4.92313
[21]	train-rmse:7.91858	val-rmse:4.63459
[22]	train-rmse:7.57028	val-rmse:4.35893
[23]	train-rmse:7.23234	val-rmse:4.11614
[24]	train-rmse:6.91899	val-rmse:3.87801
[25]	train-rmse:6.60652	val-rmse:3.64498
[26]	train-rmse:6.30869	val-rmse:3.41561
[27]	train-rmse:6.03607	val-rmse:3.19635
[28]	train-rmse:5.76936	val-rmse:2.96941
[29]	train-rmse:5.51376	val-rmse:2.77654
[30]	train-rmse:5.28321	val-rmse:2.59668
[31]	train-rmse:5.05277	val-rmse:2.42742
[32]	train-rmse:4.83271	val-rmse:2.27039
[33]	train-rmse:4.62183	val-rmse:2.10848
[34]	train-rmse:4.43137	val-rmse:1.99684
[35]	train-rmse:4.24513	val-rmse:1.84341
[36]	train-rmse:4.05927	val-rmse:1.70539
[37]	train-rm



[18]	train-rmse:8.66038	val-rmse:4.85005
[19]	train-rmse:8.28067	val-rmse:4.42984
[20]	train-rmse:7.90617	val-rmse:4.02262
[21]	train-rmse:7.54127	val-rmse:3.73931
[22]	train-rmse:7.21484	val-rmse:3.39130
[23]	train-rmse:6.88915	val-rmse:3.13005
[24]	train-rmse:6.58149	val-rmse:2.88535
[25]	train-rmse:6.29396	val-rmse:2.58098
[26]	train-rmse:6.01278	val-rmse:2.34862
[27]	train-rmse:5.74392	val-rmse:2.08094
[28]	train-rmse:5.48986	val-rmse:1.88832
[29]	train-rmse:5.24368	val-rmse:1.70127
[30]	train-rmse:5.01635	val-rmse:1.52922
[31]	train-rmse:4.79555	val-rmse:1.36408
[32]	train-rmse:4.59460	val-rmse:1.18164
[33]	train-rmse:4.40276	val-rmse:1.03520
[34]	train-rmse:4.21809	val-rmse:0.90341
[35]	train-rmse:4.03832	val-rmse:0.82448
[36]	train-rmse:3.88258	val-rmse:0.77063
[37]	train-rmse:3.71487	val-rmse:0.74115
[38]	train-rmse:3.55733	val-rmse:0.73506
[39]	train-rmse:3.40261	val-rmse:0.74988
[40]	train-rmse:3.25506	val-rmse:0.78680
[41]	train-rmse:3.11972	val-rmse:0.83028
[42]	train-rmse:



[28]	train-rmse:5.13351	val-rmse:12.31316
[29]	train-rmse:4.91432	val-rmse:12.09537
[30]	train-rmse:4.70938	val-rmse:11.89164
[31]	train-rmse:4.51509	val-rmse:11.69839
[32]	train-rmse:4.31945	val-rmse:11.50367
[33]	train-rmse:4.14707	val-rmse:11.33195
[34]	train-rmse:3.96858	val-rmse:11.15397
[35]	train-rmse:3.80179	val-rmse:10.98749
[36]	train-rmse:3.63656	val-rmse:10.82234
[37]	train-rmse:3.48204	val-rmse:10.66766
[38]	train-rmse:3.33596	val-rmse:10.52119
[39]	train-rmse:3.20101	val-rmse:10.38562
[40]	train-rmse:3.07473	val-rmse:10.25851
[41]	train-rmse:2.95620	val-rmse:10.13893
[42]	train-rmse:2.84252	val-rmse:10.02396
[43]	train-rmse:2.72597	val-rmse:9.90577
[44]	train-rmse:2.62025	val-rmse:9.79821
[45]	train-rmse:2.51864	val-rmse:9.69447
[46]	train-rmse:2.42520	val-rmse:9.59873
[47]	train-rmse:2.32773	val-rmse:9.49844
[48]	train-rmse:2.23993	val-rmse:9.40766
[49]	train-rmse:2.15522	val-rmse:9.31258
[50]	train-rmse:2.08077	val-rmse:9.23512
[51]	train-rmse:2.00458	val-rmse:9.13932
[



[105]	train-rmse:0.45482	val-rmse:3.41960
[106]	train-rmse:0.44243	val-rmse:3.42136
[107]	train-rmse:0.44065	val-rmse:3.42020
[108]	train-rmse:0.42876	val-rmse:3.42130
[109]	train-rmse:0.41850	val-rmse:3.41578
[110]	train-rmse:0.40726	val-rmse:3.40962
[111]	train-rmse:0.39788	val-rmse:3.40528
[112]	train-rmse:0.38825	val-rmse:3.40508
[113]	train-rmse:0.37988	val-rmse:3.40047
[114]	train-rmse:0.37056	val-rmse:3.39529
[115]	train-rmse:0.36879	val-rmse:3.39402
[116]	train-rmse:0.36825	val-rmse:3.39362
[117]	train-rmse:0.35903	val-rmse:3.39059
[118]	train-rmse:0.35129	val-rmse:3.39054
[119]	train-rmse:0.34399	val-rmse:3.38376
[120]	train-rmse:0.33787	val-rmse:3.37933
[121]	train-rmse:0.33178	val-rmse:3.37980
[122]	train-rmse:0.32643	val-rmse:3.37980
[123]	train-rmse:0.32563	val-rmse:3.37913
[124]	train-rmse:0.31916	val-rmse:3.38362
[125]	train-rmse:0.31295	val-rmse:3.38577
[126]	train-rmse:0.30783	val-rmse:3.38572
[127]	train-rmse:0.30664	val-rmse:3.38466
[128]	train-rmse:0.30099	val-rmse:



[88]	train-rmse:0.64750	val-rmse:1.02938
[89]	train-rmse:0.62644	val-rmse:1.03178
[90]	train-rmse:0.61492	val-rmse:1.03221
[91]	train-rmse:0.60092	val-rmse:1.03417
[92]	train-rmse:0.58518	val-rmse:1.03766
[93]	train-rmse:0.56677	val-rmse:1.03972
[94]	train-rmse:0.55580	val-rmse:1.04185
[95]	train-rmse:0.54356	val-rmse:1.04391
[96]	train-rmse:0.52952	val-rmse:1.04582
[97]	train-rmse:0.52168	val-rmse:1.05135
Validation RMSE for current split: 1.05
[0]	train-rmse:20.83545	val-rmse:17.08101
[1]	train-rmse:19.89091	val-rmse:16.12474
[2]	train-rmse:19.00018	val-rmse:15.22185
[3]	train-rmse:18.09945	val-rmse:14.30758
[4]	train-rmse:17.24547	val-rmse:13.43946
[5]	train-rmse:16.46505	val-rmse:12.64479
[6]	train-rmse:15.71666	val-rmse:11.88139
[7]	train-rmse:14.98546	val-rmse:11.13404
[8]	train-rmse:14.28546	val-rmse:10.41700
[9]	train-rmse:13.64303	val-rmse:9.75734
[10]	train-rmse:13.02106	val-rmse:9.11702
[11]	train-rmse:12.43412	val-rmse:8.51103
[12]	train-rmse:11.86675	val-rmse:8.06738
[13]	



[14]	train-rmse:10.43502	val-rmse:6.62337
[15]	train-rmse:9.95912	val-rmse:6.21810
[16]	train-rmse:9.50678	val-rmse:5.72337
[17]	train-rmse:9.08288	val-rmse:5.28439
[18]	train-rmse:8.66038	val-rmse:4.85005
[19]	train-rmse:8.28067	val-rmse:4.42984
[20]	train-rmse:7.90617	val-rmse:4.02262
[21]	train-rmse:7.54127	val-rmse:3.73931
[22]	train-rmse:7.21484	val-rmse:3.39130
[23]	train-rmse:6.88915	val-rmse:3.13005
[24]	train-rmse:6.58149	val-rmse:2.88535
[25]	train-rmse:6.29396	val-rmse:2.58098
[26]	train-rmse:6.01278	val-rmse:2.34862
[27]	train-rmse:5.74392	val-rmse:2.08094
[28]	train-rmse:5.48986	val-rmse:1.88832
[29]	train-rmse:5.24368	val-rmse:1.70127
[30]	train-rmse:5.01635	val-rmse:1.52922
[31]	train-rmse:4.79555	val-rmse:1.36408
[32]	train-rmse:4.59460	val-rmse:1.18164
[33]	train-rmse:4.40276	val-rmse:1.03520
[34]	train-rmse:4.21809	val-rmse:0.90341
[35]	train-rmse:4.03832	val-rmse:0.82448
[36]	train-rmse:3.88258	val-rmse:0.77063
[37]	train-rmse:3.71487	val-rmse:0.74115
[38]	train-rmse



[103]	train-rmse:0.39082	val-rmse:7.19716
[104]	train-rmse:0.38556	val-rmse:7.18900
[105]	train-rmse:0.37742	val-rmse:7.17812
[106]	train-rmse:0.37230	val-rmse:7.17000
[107]	train-rmse:0.36605	val-rmse:7.15980
[108]	train-rmse:0.36090	val-rmse:7.15113
[109]	train-rmse:0.35912	val-rmse:7.14806
[110]	train-rmse:0.35551	val-rmse:7.14174
[111]	train-rmse:0.34821	val-rmse:7.13193
[112]	train-rmse:0.34329	val-rmse:7.12303
[113]	train-rmse:0.33563	val-rmse:7.10791
[114]	train-rmse:0.33468	val-rmse:7.10614
[115]	train-rmse:0.33190	val-rmse:7.10086
[116]	train-rmse:0.33148	val-rmse:7.10004
[117]	train-rmse:0.32892	val-rmse:7.09503
[118]	train-rmse:0.32815	val-rmse:7.09350
[119]	train-rmse:0.32084	val-rmse:7.07949
[120]	train-rmse:0.31827	val-rmse:7.07416
[121]	train-rmse:0.31467	val-rmse:7.06641
[122]	train-rmse:0.31224	val-rmse:7.06094
[123]	train-rmse:0.31008	val-rmse:7.05592
[124]	train-rmse:0.30390	val-rmse:7.04333
[125]	train-rmse:0.30227	val-rmse:7.03930
[126]	train-rmse:0.30015	val-rmse:



[107]	train-rmse:0.44065	val-rmse:3.42020
[108]	train-rmse:0.42876	val-rmse:3.42130
[109]	train-rmse:0.41850	val-rmse:3.41578
[110]	train-rmse:0.40726	val-rmse:3.40962
[111]	train-rmse:0.39788	val-rmse:3.40528
[112]	train-rmse:0.38825	val-rmse:3.40508
[113]	train-rmse:0.37988	val-rmse:3.40047
[114]	train-rmse:0.37056	val-rmse:3.39529
[115]	train-rmse:0.36879	val-rmse:3.39402
[116]	train-rmse:0.36825	val-rmse:3.39362
[117]	train-rmse:0.35903	val-rmse:3.39059
[118]	train-rmse:0.35129	val-rmse:3.39054
[119]	train-rmse:0.34399	val-rmse:3.38376
[120]	train-rmse:0.33787	val-rmse:3.37933
[121]	train-rmse:0.33178	val-rmse:3.37980
[122]	train-rmse:0.32643	val-rmse:3.37980
[123]	train-rmse:0.32563	val-rmse:3.37913
[124]	train-rmse:0.31916	val-rmse:3.38362
[125]	train-rmse:0.31295	val-rmse:3.38577
[126]	train-rmse:0.30783	val-rmse:3.38572
[127]	train-rmse:0.30664	val-rmse:3.38466
[128]	train-rmse:0.30099	val-rmse:3.37939
[129]	train-rmse:0.29563	val-rmse:3.37439
[130]	train-rmse:0.29042	val-rmse:



[80]	train-rmse:0.80823	val-rmse:1.02311
[81]	train-rmse:0.78027	val-rmse:1.02306
[82]	train-rmse:0.75205	val-rmse:1.02469
[83]	train-rmse:0.73622	val-rmse:1.02682
[84]	train-rmse:0.71529	val-rmse:1.02806
[85]	train-rmse:0.69746	val-rmse:1.03163
[86]	train-rmse:0.67907	val-rmse:1.02789
[87]	train-rmse:0.66014	val-rmse:1.03048
[88]	train-rmse:0.64750	val-rmse:1.02938
[89]	train-rmse:0.62644	val-rmse:1.03178
[90]	train-rmse:0.61492	val-rmse:1.03221
[91]	train-rmse:0.60092	val-rmse:1.03417
[92]	train-rmse:0.58518	val-rmse:1.03766
[93]	train-rmse:0.56677	val-rmse:1.03972
[94]	train-rmse:0.55580	val-rmse:1.04185
[95]	train-rmse:0.54356	val-rmse:1.04391
[96]	train-rmse:0.52952	val-rmse:1.04582
Validation RMSE for current split: 1.05
[0]	train-rmse:20.83545	val-rmse:17.08101
[1]	train-rmse:19.89091	val-rmse:16.12474
[2]	train-rmse:19.00018	val-rmse:15.22185
[3]	train-rmse:18.09945	val-rmse:14.30758
[4]	train-rmse:17.24547	val-rmse:13.43946
[5]	train-rmse:16.46505	val-rmse:12.64479
[6]	train-r



[20]	train-rmse:7.90617	val-rmse:4.02262
[21]	train-rmse:7.54127	val-rmse:3.73931
[22]	train-rmse:7.21484	val-rmse:3.39130
[23]	train-rmse:6.88915	val-rmse:3.13005
[24]	train-rmse:6.58149	val-rmse:2.88535
[25]	train-rmse:6.29396	val-rmse:2.58098
[26]	train-rmse:6.01278	val-rmse:2.34862
[27]	train-rmse:5.74392	val-rmse:2.08094
[28]	train-rmse:5.48986	val-rmse:1.88832
[29]	train-rmse:5.24368	val-rmse:1.70127
[30]	train-rmse:5.01635	val-rmse:1.52922
[31]	train-rmse:4.79555	val-rmse:1.36408
[32]	train-rmse:4.59460	val-rmse:1.18164
[33]	train-rmse:4.40276	val-rmse:1.03520
[34]	train-rmse:4.21809	val-rmse:0.90341
[35]	train-rmse:4.03832	val-rmse:0.82448
[36]	train-rmse:3.88258	val-rmse:0.77063
[37]	train-rmse:3.71487	val-rmse:0.74115
[38]	train-rmse:3.55733	val-rmse:0.73506
[39]	train-rmse:3.40261	val-rmse:0.74988
[40]	train-rmse:3.25506	val-rmse:0.78680
[41]	train-rmse:3.11972	val-rmse:0.83028
[42]	train-rmse:2.98320	val-rmse:0.94454
[43]	train-rmse:2.86031	val-rmse:1.04459
[44]	train-rmse:

  df['time_since_last_significant_precip'] = df['time_since_last_significant_precip'].replace(to_replace=0, method='ffill')
  df['time_since_last_half_max_precip'] = df['time_since_last_half_max_precip'].replace(to_replace=0, method='ffill')


XGBoostError: [16:45:41] C:\buildkite-agent\builds\buildkite-windows-cpu-autoscaling-group-i-0fdc6d574b9c0d168-1\xgboost\xgboost-ci-windows\dmlc-core\src\io\local_filesys.cc:209: Check failed: allow_null:  LocalFileSystem::Open "C:\Users\bnsoh2\Desktop\models": Permission denied

In [6]:
# Inference
plot_number_for_inference = 2007  # Example plot number for inference
predictions = run_inference(model_save_path, plot_number_for_inference, target_column, continuous_columns, forecast_horizon)
print(predictions)


  df['time_since_last_significant_precip'] = df['time_since_last_significant_precip'].replace(to_replace=0, method='ffill')
  df['time_since_last_half_max_precip'] = df['time_since_last_half_max_precip'].replace(to_replace=0, method='ffill')


XGBoostError: [16:47:47] C:\buildkite-agent\builds\buildkite-windows-cpu-autoscaling-group-i-0fdc6d574b9c0d168-1\xgboost\xgboost-ci-windows\dmlc-core\src\io\local_filesys.cc:209: Check failed: allow_null:  LocalFileSystem::Open "C:\Users\bnsoh2\Desktop\models": Permission denied