# Test Data Generator and Reader



In [None]:
# import some packages that are used for reading and generating data


In [None]:
import numpy as np
import pandas as pd
from datetime import datetime
import os

"""
These functions are used in the tutorials to allow the user to use either artificially generated test data with an introduced break or read real ESA CCI SM and MERRA2 model SM data and then introduce a break.
All Tutorials will use these functions to get some data.
"""
def load_test_data(trend=0.05, bias=10, breaktime=datetime(2001,12,31), breaksize=(0,0)):
    '''
    This function creates actificial test data. An additive/multiplicative break can be introduced by the user.
    '''
    s_seasonality = np.sin(np.linspace(0, 4 * np.pi, 730)) * 10. + 20.
    s_trend =  np.linspace(0, 4 * np.pi, 730) * trend

    dt_index=pd.date_range(start='2001-01-01', end='2002-12-31', freq='D')
    np.random.seed(1234)
    rand_can = pd.Series(index=dt_index, data=np.random.rand(730)*10, name='CAN')
    np.random.seed(5678)
    rand_ref = pd.Series(index=dt_index, data=np.random.rand(730)*10, name='REF')

    can = (s_seasonality * (1 + s_trend) + rand_can) + bias
    ref = s_seasonality * (1 + s_trend) + rand_ref

    # additive and multiplicative relative bias
    break_index=can.loc[:breaktime].index
    can.loc[break_index] = (can.loc[break_index].values + breaksize[0]) * (1 + breaksize[1])
    
    return can, ref

def load_real_data(gpi=707393, breaktime=datetime(2001,12,31), breaksize=(0,0)):
    '''
    This function loads real observations from the ESA CCI SM v04.4 COMBINED dataset and the MERRA2 model. 
    An additive/multiplicative break can be introduced by the user.
    GPI is a location identifier for a single cell point in the ESA CCI SM data grid.
    '''
    # add a few points that can be loaded and used
    df = pd.read_csv(os.path.join(os.path.abspath(''), '..', '..', 'tests', 'test-data', 'csv_ts', 
                                  'SMECVGPI{}.csv'.format(gpi)), index_col=0, parse_dates=True)
    df = df.dropna() # drop missing days here so that the tables look nice
    can, ref = df['ESA CCI SM v04.4 (COMBINED)'], df['MERRA2 SFMC']
    # bias the candidate before the break
    can.loc[:breaktime] = (can.loc[:breaktime] * breaksize[1]) + breaksize[0] 
    return can, ref