In [20]:
import pandas as pd
import numpy as np
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C
import matplotlib.pyplot as plt

In [21]:
# Example DataFrames
data_long = {'value_long': [10, 20, 30, 40, 50, 60, 70]}
data_short = {'value_short': [15, np.nan, np.nan]}

index_long = pd.date_range('2022-01-01', periods=len(data_long['value_long']), freq='D')
index_short = pd.date_range('2022-01-03', periods=len(data_short['value_short']), freq='D')

df_long = pd.DataFrame(data_long, index=index_long)
df_short = pd.DataFrame(data_short, index=index_short)

In [22]:
def feet2meters(column_in_feet):
    """
    Convert a Pandas DataFrame column from feet to meters.
    
    Parameters:
        column_in_feet (pandas.Series): The Pandas DataFrame column in feet.

    Returns:
        pandas.Series: The converted column in meters.
    """
    # Conversion factor: 1 foot = 0.3048 meters
    conversion_factor = 0.3048
    
    # Use the conversion factor to convert the column
    column_in_meters = column_in_feet * conversion_factor
    
    return column_in_meters

In [23]:
lakelevel = pd.read_csv('/home/koepflma/project1/Mt-St-Helens/MtStHelens_activity/lakelevel.csv', index_col='Date', parse_dates=True)
ll_meters = feet2meters(lakelevel['LakeLevel(feet)'])
lakelevel['LakeLevel(meter)'] = ll_meters
lakelevel['rel_LakeLevel(meter)'] = lakelevel['LakeLevel(meter)'] - min(lakelevel['LakeLevel(meter)'])
lakelevel

Unnamed: 0_level_0,LakeLevel(feet),LakeLevel(meter),rel_LakeLevel(meter)
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2000-01-01,3444.64,1049.926272,2.121408
2000-01-02,3444.61,1049.917128,2.112264
2000-01-03,3444.53,1049.892744,2.087880
2000-01-04,3444.65,1049.929320,2.124456
2000-01-05,3444.60,1049.914080,2.109216
...,...,...,...
2023-12-22,3444.62,1049.920176,2.115312
2023-12-23,3444.55,1049.898840,2.093976
2023-12-24,3444.51,1049.886648,2.081784
2023-12-25,3444.51,1049.886648,2.081784


In [24]:
borehole = pd.read_csv('/home/koepflma/project1/Mt-St-Helens/MtStHelens_activity/Water_level_depth_LSD.csv',
                        skiprows=14, index_col='ISO 8601 UTC', parse_dates=True)
borehole_meters = feet2meters(borehole['Value'])
borehole['borehole(meter)'] = borehole_meters
borehole['rel_borehole(meter)'] = max(borehole['borehole(meter)']) - borehole['borehole(meter)']
borehole = borehole.resample('D').mean().to_period('D')
borehole

  borehole = borehole.resample('D').mean().to_period('D')
  borehole = borehole.resample('D').mean().to_period('D')


Unnamed: 0_level_0,Value,Grade,Qualifiers,borehole(meter),rel_borehole(meter)
ISO 8601 UTC,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2019-11-21,41.981824,50.0,,12.796060,0.356190
2019-11-22,41.998892,50.0,,12.801262,0.350988
2019-11-23,42.051017,50.0,,12.817150,0.335100
2019-11-24,42.110537,50.0,,12.835292,0.316959
2019-11-25,42.217870,50.0,,12.868007,0.284243
...,...,...,...,...,...
2024-01-15,33.656146,50.0,,10.258393,2.893857
2024-01-16,33.580833,50.0,,10.235438,2.916812
2024-01-17,33.476042,50.0,,10.203498,2.948753
2024-01-18,33.390833,50.0,,10.177526,2.974724


In [25]:
# Convert datetime index to ordinal for GPR
X_train = np.array(lakelevel.index.map(lambda x: x.toordinal())).reshape(-1, 1)
y_train = np.array(lakelevel['rel_LakeLevel(meter)'])

In [26]:
# Fill NaN values in df_long using linear interpolation
lakelevel['rel_LakeLevel(meter)'] = lakelevel['rel_LakeLevel(meter)'].interpolate(method='linear')

In [29]:
# Create a Gaussian Process Regression model
kernel = C(1.0, (1e-3, 1e3)) * RBF(1.0, (1e-2, 1e2))
model = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=10)

# Prepare the data for training
X_train = np.array(lakelevel['rel_LakeLevel(meter)'].index).reshape(-1, 1)
y_train = np.array(lakelevel['rel_LakeLevel(meter)'])

# Train the GPR model
model.fit(X_train, y_train)

# Convert datetime index to ordinal for GPR predictions
X_predict = np.array(borehole.index.map(lambda x: x.toordinal())).reshape(-1, 1)

# Predict values for the shorter time series
X_predict = np.array(borehole['rel_borehole(meter)'].index).reshape(-1, 1)
y_predict, sigma = model.predict(X_predict, return_std=True)

# Update the short time series with predicted values
borehole['gpr_predict'] = y_predict

UFuncTypeError: ufunc 'divide' cannot use operands with types dtype('<M8[ns]') and dtype('float64')

In [None]:
# Plotting
plt.figure(figsize=(10, 6))
plt.plot(df_long.index, df_long['value_long'], label='Long Time Series', marker='o')
plt.plot(df_short.index, df_short['value_short'], label='Short Time Series (Original)', marker='o')
plt.plot(df_short.index, df_short['value_short_gpr'], label='Short Time Series (GPR Predicted)', marker='o')
plt.fill_between(df_short.index, y_predict - 1.96 * sigma, y_predict + 1.96 * sigma, alpha=0.2, color='gray')
plt.title('Gaussian Process Regression for Time Series Imputation')
plt.xlabel('Time')
plt.ylabel('Value')
plt.legend()
plt.show()

In [30]:
import pandas as pd
import numpy as np
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C
import matplotlib.pyplot as plt

# Example DataFrames
# Assuming you have lakelevel and borehole DataFrames

# Convert datetime index to ordinal for lakelevel
lakelevel['rel_LakeLevel(meter)'] = lakelevel['rel_LakeLevel(meter)'].interpolate(method='linear')
X_train_lakelevel = np.array(lakelevel.index.map(lambda x: x.toordinal())).reshape(-1, 1)
y_train_lakelevel = np.array(lakelevel['rel_LakeLevel(meter)'])

# Create a Gaussian Process Regression model for lakelevel
kernel_lakelevel = C(1.0, (1e-3, 1e3)) * RBF(1.0, (1e-2, 1e2))
model_lakelevel = GaussianProcessRegressor(kernel=kernel_lakelevel, n_restarts_optimizer=10)
model_lakelevel.fit(X_train_lakelevel, y_train_lakelevel)

# Convert datetime index to ordinal for borehole
X_predict_borehole = np.array(borehole.index.map(lambda x: x.toordinal())).reshape(-1, 1)

# Predict values for the shorter time series (borehole)
X_predict_borehole = np.array(borehole.index.map(lambda x: x.toordinal())).reshape(-1, 1)
y_predict_borehole, sigma_borehole = model_lakelevel.predict(X_predict_borehole, return_std=True)

# Update the short time series (borehole) with predicted values
borehole['gpr_predict'] = y_predict_borehole

# Plotting
plt.figure(figsize=(10, 6))
plt.plot(df_long.index, df_long['value_long'], label='Long Time Series', marker='o')
plt.plot(df_short.index, df_short['value_short'], label='Short Time Series (Original)', marker='o')
plt.plot(df_short.index, df_short['value_short_gpr'], label='Short Time Series (GPR Predicted)', marker='o')
plt.fill_between(df_short.index, y_predict - 1.96 * sigma, y_predict + 1.96 * sigma, alpha=0.2, color='gray')
plt.title('Gaussian Process Regression for Time Series Imputation')
plt.xlabel('Time')
plt.ylabel('Value')
plt.legend()
plt.show()

AttributeError: 'Period' object has no attribute 'toordinal'