# Filling Missing Data

This report serves as the base for filling missing data in our datasets. It outlines the methods used to handle missing values, the rationale behind choosing these methods, and the impact on the overall data quality.

**Jakob Balkovec**  
**Date: Nov 5, 2025**

In [9]:
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import ee
from datetime import datetime, timedelta
from tqdm import tqdm

In [7]:
FILES_DARRINGTON = {
  "2007": Path("/Users/jbalkovec/Desktop/MDR/Temporal/Pipeline/data/raw/darrington/uscrn_WA_Darrington_21_NNE_2007.txt"),
  "2008": Path("/Users/jbalkovec/Desktop/MDR/Temporal/Pipeline/data/raw/darrington/uscrn_WA_Darrington_21_NNE_2008.txt"),
  "2009": Path("/Users/jbalkovec/Desktop/MDR/Temporal/Pipeline/data/raw/darrington/uscrn_WA_Darrington_21_NNE_2009.txt"),
  "2010": Path("/Users/jbalkovec/Desktop/MDR/Temporal/Pipeline/data/raw/darrington/uscrn_WA_Darrington_21_NNE_2010.txt"),
  "2011": Path("/Users/jbalkovec/Desktop/MDR/Temporal/Pipeline/data/raw/darrington/uscrn_WA_Darrington_21_NNE_2011.txt"),
  "2012": Path("/Users/jbalkovec/Desktop/MDR/Temporal/Pipeline/data/raw/darrington/uscrn_WA_Darrington_21_NNE_2012.txt"),
  "2013": Path("/Users/jbalkovec/Desktop/MDR/Temporal/Pipeline/data/raw/darrington/uscrn_WA_Darrington_21_NNE_2013.txt"),
  "2014": Path("/Users/jbalkovec/Desktop/MDR/Temporal/Pipeline/data/raw/darrington/uscrn_WA_Darrington_21_NNE_2014.txt"),
  "2015": Path("/Users/jbalkovec/Desktop/MDR/Temporal/Pipeline/data/raw/darrington/uscrn_WA_Darrington_21_NNE_2015.txt"),
  "2016": Path("/Users/jbalkovec/Desktop/MDR/Temporal/Pipeline/data/raw/darrington/uscrn_WA_Darrington_21_NNE_2016.txt"),
  "2017": Path("/Users/jbalkovec/Desktop/MDR/Temporal/Pipeline/data/raw/darrington/uscrn_WA_Darrington_21_NNE_2017.txt"),
  "2018": Path("/Users/jbalkovec/Desktop/MDR/Temporal/Pipeline/data/raw/darrington/uscrn_WA_Darrington_21_NNE_2018.txt"),
  "2019": Path("/Users/jbalkovec/Desktop/MDR/Temporal/Pipeline/data/raw/darrington/uscrn_WA_Darrington_21_NNE_2019.txt"),
  "2020": Path("/Users/jbalkovec/Desktop/MDR/Temporal/Pipeline/data/raw/darrington/uscrn_WA_Darrington_21_NNE_2020.txt"),
  "2021": Path("/Users/jbalkovec/Desktop/MDR/Temporal/Pipeline/data/raw/darrington/uscrn_WA_Darrington_21_NNE_2021.txt"),
  "2022": Path("/Users/jbalkovec/Desktop/MDR/Temporal/Pipeline/data/raw/darrington/uscrn_WA_Darrington_21_NNE_2022.txt"),
  "2023": Path("/Users/jbalkovec/Desktop/MDR/Temporal/Pipeline/data/raw/darrington/uscrn_WA_Darrington_21_NNE_2023.txt"),
  "2024": Path("/Users/jbalkovec/Desktop/MDR/Temporal/Pipeline/data/raw/darrington/uscrn_WA_Darrington_21_NNE_2024.txt"),
  "2025": Path("/Users/jbalkovec/Desktop/MDR/Temporal/Pipeline/data/raw/darrington/uscrn_WA_Darrington_21_NNE_2025.txt")
}

In [4]:
def parse_uscrn_file(file_path: Path) -> pd.DataFrame:

    col_names = [
        "station_id", "date", "crx_vn", "longitude", "latitude",
        "air_temp_max", "air_temp_min", "air_temp_mean", "air_temp_avg",
        "precipitation", "solar_radiation", "sur_temp_type",
        "sur_temp_max", "sur_temp_min", "sur_temp_avg",
        "rh_max", "rh_min", "rh_mean",
        "soil_moisture_5cm", "soil_moisture_10cm", "soil_moisture_20cm",
        "soil_moisture_50cm", "soil_moisture_100cm",
        "soil_temp_5cm", "soil_temp_10cm", "soil_temp_20cm",
        "soil_temp_50cm", "soil_temp_100cm"
    ]

    df = pd.read_csv(
        file_path,
        delim_whitespace=True,
        header=None,
        names=col_names,
        dtype=str,  # load all as string first to prevent parsing errors
        na_values=["-9999.0", "-99.000", "-99.0", "-9999", "-99"],
    )

    numeric_cols = [c for c in df.columns if c not in ["station_id", "date", "sur_temp_type"]]
    df[numeric_cols] = df[numeric_cols].apply(pd.to_numeric, errors="coerce")

    df["date"] = pd.to_datetime(df["date"], format="%Y%m%d", errors="coerce")
    df = df.sort_values("date").reset_index(drop=True)
    df = df.drop_duplicates(subset=["date"])

    df["source_file"] = file_path.name

    return df


def load_darrington_dataset(files_dict: dict[str, Path]) -> pd.DataFrame:
    all_dfs = []
    for year, path in files_dict.items():
        if path.exists():
            df_year = parse_uscrn_file(path)
            all_dfs.append(df_year)
        else:
            print(f"Missing file for {year}: {path}")
    if not all_dfs:
        raise FileNotFoundError("No valid USCRN files found for Darrington.")
    df_all = pd.concat(all_dfs, ignore_index=True)
    return df_all

In [23]:
darrington_df = load_darrington_dataset(FILES_DARRINGTON)

# make a subset from 2015-01-01 to 2015-12-31 for testing
darrington_2015 = darrington_df[
    (darrington_df["date"] >= "2015-01-01") &
    (darrington_df["date"] <= "2015-12-31")
].reset_index(drop=True)

darrington_2015.head()

Unnamed: 0,station_id,date,crx_vn,longitude,latitude,air_temp_max,air_temp_min,air_temp_mean,air_temp_avg,precipitation,...,soil_moisture_10cm,soil_moisture_20cm,soil_moisture_50cm,soil_moisture_100cm,soil_temp_5cm,soil_temp_10cm,soil_temp_20cm,soil_temp_50cm,soil_temp_100cm,source_file
0,4223,2015-01-01,2.422,-121.45,48.54,3.7,-6.9,-1.6,-2.6,0.0,...,0.319,0.226,0.191,0.232,0.3,0.7,2.2,2.8,3.2,uscrn_WA_Darrington_21_NNE_2015.txt
1,4223,2015-01-02,2.422,-121.45,48.54,1.2,-1.0,0.1,0.1,7.0,...,0.319,0.223,0.187,0.229,0.3,0.6,2.0,2.5,2.9,uscrn_WA_Darrington_21_NNE_2015.txt
2,4223,2015-01-03,2.422,-121.45,48.54,2.5,0.1,1.3,0.9,2.3,...,0.333,0.228,0.192,0.231,0.3,0.6,1.8,2.3,2.7,uscrn_WA_Darrington_21_NNE_2015.txt
3,4223,2015-01-04,2.422,-121.45,48.54,2.1,0.1,1.1,0.8,53.7,...,0.379,0.29,0.246,0.257,0.4,0.6,1.6,2.0,2.4,uscrn_WA_Darrington_21_NNE_2015.txt
4,4223,2015-01-05,2.422,-121.45,48.54,4.8,2.0,3.4,3.7,80.1,...,0.396,0.354,0.338,0.289,1.1,0.8,1.2,1.4,1.6,uscrn_WA_Darrington_21_NNE_2015.txt


## Satellite Pipe - (Copy Paste)

In [14]:
try:
    ee.Initialize(project="mdr-project-475522")
except Exception:
    ee.Authenticate()
    ee.Initialize(project="mdr-project-475522")

def fetch_satellite_values(lat, lon, start_date, end_date):
    MODIS_LST = "MODIS/061/MOD11A1"
    MODIS_NDVI = "MODIS/061/MOD13Q1"
    GPM_RAIN = "NASA/GPM_L3/IMERG_V07"

    point = ee.Geometry.Point([lon, lat])
    buffer_region = point.buffer(1000)

    start_dt = datetime.strptime(start_date, "%Y-%m-%d") - timedelta(days=3)
    end_dt = datetime.strptime(end_date, "%Y-%m-%d") + timedelta(days=3)
    padded_start, padded_end = start_dt.strftime("%Y-%m-%d"), end_dt.strftime("%Y-%m-%d")

    result = {"LST": None, "NDVI": None, "Rain_sat": None}

    try:
        # MODIS LST (daily)
        lst = (
            ee.ImageCollection(MODIS_LST)
            .filterBounds(buffer_region)
            .filterDate(padded_start, padded_end)
            .select("LST_Day_1km")
        )
        if lst.size().getInfo() > 0:
            val = lst.mean().reduceRegion(
                reducer=ee.Reducer.mean(),
                geometry=buffer_region,
                scale=1000,
                bestEffort=True,
                maxPixels=1e9,
            ).get("LST_Day_1km").getInfo()
            if val is not None:
                result["LST"] = float(val) * 0.02

        # MODIS NDVI (16-day composite)
        ndvi = (
            ee.ImageCollection(MODIS_NDVI)
            .filterBounds(buffer_region)
            .filterDate(padded_start, padded_end)
            .select("NDVI")
        )
        if ndvi.size().getInfo() > 0:
            val = ndvi.mean().reduceRegion(
                reducer=ee.Reducer.mean(),
                geometry=buffer_region,
                scale=250,
                bestEffort=True,
                maxPixels=1e9,
            ).get("NDVI").getInfo()
            if val is not None:
                result["NDVI"] = float(val) * 0.0001

        # GPM IMERG (daily average)
        rain = (
            ee.ImageCollection(GPM_RAIN)
            .filterBounds(buffer_region)
            .filterDate(padded_start, padded_end)
            .select("precipitation")
        )
        if rain.size().getInfo() > 0:
            val = rain.mean().reduceRegion(
                reducer=ee.Reducer.mean(),
                geometry=buffer_region,
                scale=10000,
                bestEffort=True,
                maxPixels=1e9,
            ).get("precipitation").getInfo()
            if val is not None:
                result["Rain_sat"] = float(val)

    except Exception as e:
        print(f"Satellite fetch failed for {start_date}–{end_date}: {e}")

    return result

In [15]:
def build_daily_satellite_df(lat, lon, start_date, end_date, freq_days=3):
    all_days = pd.date_range(start=start_date, end=end_date, freq="D")
    sampled_days = pd.date_range(start=start_date, end=end_date, freq=f"{freq_days}D")

    records = []

    for d in tqdm(sampled_days, desc="Fetching GEE Satellite Data"):
        s = d.strftime("%Y-%m-%d")
        e = (d + timedelta(days=freq_days - 1)).strftime("%Y-%m-%d")
        values = fetch_satellite_values(lat, lon, s, e)
        records.append({"date": d, **values})

    df = pd.DataFrame(records)
    df["date"] = pd.to_datetime(df["date"])

    full = pd.DataFrame({"date": all_days})
    df_full = pd.merge(full, df, on="date", how="left")

    return df_full

In [24]:
lat, lon = 48.54, -121.45

sat_darrington_2015 = build_daily_satellite_df(
    lat,
    lon,
    "2015-01-01", "2015-12-31",
    freq_days=3)

Fetching GEE Satellite Data: 100%|██████████| 122/122 [03:52<00:00,  1.90s/it]


In [26]:
sat_darrington_2015.head(n=5)

Unnamed: 0,date,LST,NDVI,Rain_sat
0,2015-01-01,272.350776,0.759495,0.870878
1,2015-01-02,,,
2,2015-01-03,,,
3,2015-01-04,278.419179,0.759495,0.890747
4,2015-01-05,,,


In [28]:
darrington_df_full = pd.merge(
    darrington_2015,
    sat_darrington_2015,
    on="date",
    how="left"
)

darrington_df_full.head()

Unnamed: 0,station_id,date,crx_vn,longitude,latitude,air_temp_max,air_temp_min,air_temp_mean,air_temp_avg,precipitation,...,soil_moisture_100cm,soil_temp_5cm,soil_temp_10cm,soil_temp_20cm,soil_temp_50cm,soil_temp_100cm,source_file,LST,NDVI,Rain_sat
0,4223,2015-01-01,2.422,-121.45,48.54,3.7,-6.9,-1.6,-2.6,0.0,...,0.232,0.3,0.7,2.2,2.8,3.2,uscrn_WA_Darrington_21_NNE_2015.txt,272.350776,0.759495,0.870878
1,4223,2015-01-02,2.422,-121.45,48.54,1.2,-1.0,0.1,0.1,7.0,...,0.229,0.3,0.6,2.0,2.5,2.9,uscrn_WA_Darrington_21_NNE_2015.txt,,,
2,4223,2015-01-03,2.422,-121.45,48.54,2.5,0.1,1.3,0.9,2.3,...,0.231,0.3,0.6,1.8,2.3,2.7,uscrn_WA_Darrington_21_NNE_2015.txt,,,
3,4223,2015-01-04,2.422,-121.45,48.54,2.1,0.1,1.1,0.8,53.7,...,0.257,0.4,0.6,1.6,2.0,2.4,uscrn_WA_Darrington_21_NNE_2015.txt,278.419179,0.759495,0.890747
4,4223,2015-01-05,2.422,-121.45,48.54,4.8,2.0,3.4,3.7,80.1,...,0.289,1.1,0.8,1.2,1.4,1.6,uscrn_WA_Darrington_21_NNE_2015.txt,,,


## Baseline - Linear Interpolation

In [30]:
def linear_time_interpolator(df, cols=None, limit=None):
    df = df.copy()
    df = df.sort_values('date')
    df.set_index('date', inplace=True)

    numeric_cols = cols or df.select_dtypes(include=np.number).columns
    df[numeric_cols] = df[numeric_cols].interpolate(
        method='time',
        limit=limit,
        limit_direction='both'
    )

    return df.reset_index()

## Multivariate Model Based Imputer

In [31]:
from sklearn.experimental import enable_iterative_imputer  # noqa
from sklearn.impute import IterativeImputer

def multivariate_imputer(df, cols):
    imp = IterativeImputer(max_iter=20, random_state=42)
    df_imputed = df.copy()
    df_imputed[cols] = imp.fit_transform(df[cols])
    return df_imputed

## Masking Test

In [112]:
from sklearn.metrics import mean_squared_error
import pandas as pd
import numpy as np

def interpolation_bridge_test(df, col, start_date=None, end_date=None, method="time"):
    df = df.copy().sort_values("date").reset_index(drop=True)
    df = df[["date", col]].dropna(subset=[col])
    df.set_index("date", inplace=True)

    # Auto-pick two known points if not given
    if not start_date or not end_date:
        valid_dates = df.index.to_list()
        for i in range(len(valid_dates) - 1):
            if (valid_dates[i + 1] - valid_dates[i]).days >= 3:
                start_date, end_date = valid_dates[i], valid_dates[i + 1]
                break

    if start_date not in df.index or end_date not in df.index:
        raise ValueError("Start or end date not found in dataset with known values.")

    true_val = df.loc[end_date, col]

    # Build full daily range with both endpoints
    bridge_dates = pd.date_range(start=start_date, end=end_date, freq="D")
    bridge_df = pd.DataFrame({"date": bridge_dates})
    bridge_df = bridge_df.merge(df.reset_index(), on="date", how="left")

    # Keep endpoints, mask only in-between for interpolation display
    mask_between = (bridge_df["date"] > start_date) & (bridge_df["date"] < end_date)
    bridge_df.loc[mask_between, col] = np.nan
    bridge_df.set_index("date", inplace=True)

    # Interpolate full path for visualization
    bridge_df[col + "_interp"] = bridge_df[col].interpolate(method=method, limit_direction="both")

    # --- Error calculation section ---
    # Copy, then hide the true end for testing
    df_error = bridge_df.copy()
    df_error.loc[end_date, col] = np.nan
    df_error[col + "_interp"] = df_error[col].interpolate(method=method, limit_direction="both")

    pred_val = df_error.loc[end_date, col + "_interp"]
    error = pred_val - true_val
    rmse = np.sqrt(mean_squared_error([true_val], [pred_val]))

    # --- Output ---
    print("=" * 70)
    print(f"Interpolation Bridge Test for: {col}")
    print("-" * 70)
    print(f" Start Date       : {start_date.date()}")
    print(f" End Date         : {end_date.date()}")
    print(f" Days Spanned     : {(end_date - start_date).days}")
    print(f" True End Value   : {true_val:10.4f}")
    print(f" Predicted Value  : {pred_val:10.4f}")
    print(f" Error (signed)   : {error:10.4f}")
    print(f" Absolute Error   : {abs(error):10.4f}")
    print(f" RMSE             : {rmse:10.4f}")
    print("-" * 70)
    print(" Intermediate Values (Simulated Daily Interpolation):\n")
    print(bridge_df.reset_index().to_string(index=False))
    print("=" * 70)

    return {
        "feature": col,
        "start_date": start_date,
        "end_date": end_date,
        "days_between": (end_date - start_date).days,
        "true_end_val": true_val,
        "pred_end_val": pred_val,
        "error": error,
        "abs_error": abs(error),
        "RMSE": rmse,
        "interp_segment": bridge_df.reset_index()
    }

## Tests

In [None]:
_ = interpolation_bridge_test(
    sat_darrington_2015,
    col="LST",
    method="time"
)

Interpolation Bridge Test for: LST
----------------------------------------------------------------------
 Start Date       : 2015-01-01
 End Date         : 2015-01-04
 Days Spanned     : 3
 True End Value   :   278.4192
 Predicted Value  :   272.3508
 Error (signed)   :    -6.0684
 Absolute Error   :     6.0684
 RMSE             :     6.0684
----------------------------------------------------------------------
 Intermediate Values (Simulated Daily Interpolation):

      date        LST  LST_interp
2015-01-01 272.350776  272.350776
2015-01-02        NaN  274.373577
2015-01-03        NaN  276.396378
2015-01-04 278.419179  278.419179


In [114]:
_ = interpolation_bridge_test(
    sat_darrington_2015,
    col="NDVI",
    method="time"
)

Interpolation Bridge Test for: NDVI
----------------------------------------------------------------------
 Start Date       : 2015-01-01
 End Date         : 2015-01-04
 Days Spanned     : 3
 True End Value   :     0.7595
 Predicted Value  :     0.7595
 Error (signed)   :     0.0000
 Absolute Error   :     0.0000
 RMSE             :     0.0000
----------------------------------------------------------------------
 Intermediate Values (Simulated Daily Interpolation):

      date     NDVI  NDVI_interp
2015-01-01 0.759495     0.759495
2015-01-02      NaN     0.759495
2015-01-03      NaN     0.759495
2015-01-04 0.759495     0.759495


In [115]:
_ = interpolation_bridge_test(
    sat_darrington_2015,
    col="Rain_sat",
    method="time"
)

Interpolation Bridge Test for: Rain_sat
----------------------------------------------------------------------
 Start Date       : 2015-01-01
 End Date         : 2015-01-04
 Days Spanned     : 3
 True End Value   :     0.8907
 Predicted Value  :     0.8709
 Error (signed)   :    -0.0199
 Absolute Error   :     0.0199
 RMSE             :     0.0199
----------------------------------------------------------------------
 Intermediate Values (Simulated Daily Interpolation):

      date  Rain_sat  Rain_sat_interp
2015-01-01  0.870878         0.870878
2015-01-02       NaN         0.877501
2015-01-03       NaN         0.884124
2015-01-04  0.890747         0.890747


In [124]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.linear_model import LinearRegression
from xgboost import XGBRegressor
import pandas as pd
import numpy as np

def imputation_bridge_test(df, col, model_type="linear", start_date=None, end_date=None, window=7):
    df = df.copy().sort_values("date").reset_index(drop=True)
    df = df[["date", col]]

    # Select valid anchor points
    if not start_date or not end_date:
        valid_dates = df[df[col].notna()]["date"].to_list()

        if len(valid_dates) < 2:
            raise ValueError(f"Not enough valid values for {col} to run test.")

        # Try to find two points ≥ window apart; else fallback to consecutive
        chosen = False
        for i in range(len(valid_dates) - 1):
            if (valid_dates[i + 1] - valid_dates[i]).days >= window:
                start_date, end_date = valid_dates[i], valid_dates[i + 1]
                chosen = True
                break
        if not chosen:
            start_date, end_date = valid_dates[0], valid_dates[1]

    # Make sure both exist in dataset
    if start_date not in set(df["date"]) or end_date not in set(df["date"]):
        raise ValueError(f"No known values found for {col} in date range {start_date}–{end_date}")

    # Ground truth
    true_val = df.loc[df["date"] == end_date, col].values[0]

    # Create full daily frame
    bridge_dates = pd.date_range(start=start_date, end=end_date, freq="D")
    bridge_df = pd.DataFrame({"date": bridge_dates})
    bridge_df = bridge_df.merge(df, on="date", how="left")

    # Mask intermediate days
    mask_between = (bridge_df["date"] > start_date) & (bridge_df["date"] < end_date)
    bridge_df.loc[mask_between, col] = np.nan

    # --- Choose model ---
    if model_type == "xgboost":
        model = XGBRegressor(n_estimators=200, learning_rate=0.1, max_depth=3, verbosity=0)
    elif model_type == "linear":
        model = LinearRegression()
    elif model_type == "rolling":
        bridge_df[col + "_interp"] = bridge_df[col].ffill().rolling(window=3, min_periods=1).mean()
    else:
        raise ValueError("Unknown model type")

    # --- Fit model if needed ---
    if model_type in ["xgboost", "linear"]:
        known = bridge_df.dropna(subset=[col])
        X_train = (known["date"] - known["date"].iloc[0]).dt.days.values.reshape(-1, 1)
        y_train = known[col].values
        model.fit(X_train, y_train)

        X_all = (bridge_df["date"] - known["date"].iloc[0]).dt.days.values.reshape(-1, 1)
        bridge_df[col + "_interp"] = model.predict(X_all)

    # --- Evaluate ---
    pred_val = bridge_df.loc[bridge_df["date"] == end_date, col + "_interp"].values[0]
    rmse = np.sqrt(mean_squared_error([true_val], [pred_val]))
    mae = mean_absolute_error([true_val], [pred_val])
    r2 = np.nan  # not meaningful for one sample

    # --- Print summary ---
    print("=" * 70)
    print(f"Imputation Bridge Test for: {col} | Model: {model_type}")
    print("-" * 70)
    print(f" Start Date  : {start_date.date()}")
    print(f" End Date    : {end_date.date()}")
    print(f" True Value  : {true_val:.4f}")
    print(f" Predicted   : {pred_val:.4f}")
    print(f" Error       : {(pred_val - true_val):+.4f}")
    print(f" RMSE        : {rmse:.4f} | MAE={mae:.4f}")
    print("=" * 70)

    return {
        "feature": col,
        "model": model_type,
        "start_date": start_date,
        "end_date": end_date,
        "true_val": true_val,
        "pred_val": pred_val,
        "rmse": rmse,
        "mae": mae,
        "r2": r2,
        "bridge_df": bridge_df
    }

In [125]:
for feature in ["LST", "NDVI", "Rain_sat"]:
    for model in ["linear", "rolling", "xgboost"]:
        _ = imputation_bridge_test(sat_darrington_2015, col=feature, model_type=model)

Imputation Bridge Test for: LST | Model: linear
----------------------------------------------------------------------
 Start Date  : 2015-02-03
 End Date    : 2015-02-12
 True Value  : 282.2132
 Predicted   : 282.2132
 Error       : +0.0000
 RMSE        : 0.0000 | MAE=0.0000
Imputation Bridge Test for: LST | Model: rolling
----------------------------------------------------------------------
 Start Date  : 2015-02-03
 End Date    : 2015-02-12
 True Value  : 282.2132
 Predicted   : 282.1566
 Error       : -0.0566
 RMSE        : 0.0566 | MAE=0.0566
Imputation Bridge Test for: LST | Model: xgboost
----------------------------------------------------------------------
 Start Date  : 2015-02-03
 End Date    : 2015-02-12
 True Value  : 282.2132
 Predicted   : 282.2123
 Error       : -0.0009
 RMSE        : 0.0009 | MAE=0.0009
Imputation Bridge Test for: NDVI | Model: linear
----------------------------------------------------------------------
 Start Date  : 2015-01-04
 End Date    : 2015-0