In [1]:
%run Imports.ipynb

  import pandas.util.testing as tm


# DataFrame Loader

In [274]:
#class DataFrameBuilder:
    
#Target could be raw price value or % of change
def DefineTarget(df, target = "value"):
    df['target_price'] = df['price_close'].shift(-1)
    df['target_percentage'] = ((df['target_price'] / df['price_close'])-1)*100
    if target == "value":
        df['target'] = df['target_price']
    elif target == 'percentage':
        df['target'] = df['target_percentage']
    df = df.drop(columns=['target_price', 'target_percentage', 'date'])
    return df

#Load Base data and define date columnt to Datetime
def LoadBaseDatasetWithTarget(target = 'value'):
    """ Return base Df with specific format of target.
    Parameters:
        target (bool):format of target "value"/ "percentage"
    Returns:
        df(df):Modified dataframe   
    """
    df = pd.read_csv('data/cleardata.csv', encoding="utf-16")
    for index, row in df.iterrows():
        df.at[index, 'date'] =  datetime.datetime.strptime(str(row['date']), "%Y-%m-%d %H:%M:%S")
    df = DefineTarget(df, target = target)
    df.drop(df.tail(1).index,inplace=True)
    return df

# define differenced columns , options replace_old for inplace or add diff cols
def LoadDifferencedDatasetWithTarget(replace_old = False, target = 'value'):
    """ Return differenced Df with specific format of target.
    Parameters:
        replace_old (bool):Replace or add differenced cols
        target (bool):format of target "value"/ "percentage"
    Returns:
        df(df):Modified dataframe   
    """
    df = LoadBaseDatasetWithTarget(target=target)
    for column in df:
        if column == 'date' or column == 'dayOfWeek_sin' or column == 'dayOfWeek_cos' or column == 'isWeekend' or column == 'target':
            continue

        newcolumn = column
        if replace_old == False :
            newcolumn = str(column)+"_d"
        df[newcolumn] = df[column].diff()
    df = df.shift(-1)
    df.drop(df.tail(50).index,inplace=True)
    df = df.dropna()
    return df

def DefineThreeClasses(df, stay_boundary):
    df['target_rise_fall'] = "Hopla"
    df['target_rise_fall'] = np.where((df.target > stay_boundary),'Rise',df.target_rise_fall)
    df['target_rise_fall'] = np.where((df.target <-(stay_boundary)),'Fall',df.target_rise_fall)
    df['target_rise_fall'] = np.where(((df.target >=-(stay_boundary)) & (df.target <=stay_boundary)),'Stay',df.target_rise_fall)
    df['target'] = df['target_rise_fall']
    df = df.drop(columns=['target_rise_fall'])
    return df

def DefineTwoClasses(df):
    df['target'] = np.where(df['target']>0, 'Rise', 'Fall')
    return df

# define classification Rise/Fall dataset based on percentual change
def LoadBaseDatasetForTwoClass():
    """ Return dataframe with two Rise/Fall classes.
    Parameters:
       
    Returns:
        df(df):Modified dataframe   
    """
    df = LoadBaseDatasetWithTarget(target='percentage')
    df = DefineTwoClasses(df)
    return df

# define classification Rise/Fall dataset based on percentual change
def LoadBaseDatasetForThreeClass(stay_boundary = 0.2):
    """ Return dataframe with three Rise/Fall/Stay classes based on stay_boundary.
    Parameters:
        stay_boundary (float): +/- percentage which define "stay" class
    Returns:
        df(df):Modified dataframe   
    """
    df = LoadBaseDatasetWithTarget(target='percentage')
    df = DefineThreeClasses(df, stay_boundary = stay_boundary)
    return df

# define diff dataset for binary classification
def LoadDifferencedDatasetForTwoClass(replace_old=False):
    """ Return diff dataframe with two Rise/Fall classes.
    Parameters:
        replace_old (str):Inplace or add new cols
    Returns:
        df(df):Modified dataframe
    """
    df = LoadDifferencedDatasetWithTarget(replace_old=replace_old, target='percentage')
    df = DefineTwoClasses(df)
    return df

# define diff dataset for three class classification
def LoadDifferencedDatasetForThreeClass(replace_old=False, stay_boundary = 0.2):
    """ Return diff dataframe with two Rise/Fall classes.
    Parameters:
        replace_old (str):Inplace or add new cols
        stay_boundary (float): +/- percentage which define "stay" class
    Returns:
        df(df):Modified dataframe  
    """
    df = LoadDifferencedDatasetWithTarget(replace_old=replace_old, target='percentage')
    df = DefineThreeClasses(df, stay_boundary = stay_boundary)
    return df


# Dataframe Window Builder

In [271]:
def CreateShiftedColumn(df, col, i):
    if col == 'target':
        return df
    df[col+'_'+str(i)] = df[col].shift(i)
    return df

def CreateSlidingWindows(df, windowsize = 3):
    cols = df.columns
    for i in range(1, windowsize+1):
        for col in cols: 
            df = CreateShiftedColumn(df, col, i)
    df = df.iloc[windowsize:]
    return df
