# <span style="color:Maroon">Short Term Dependent Variable

__Summary:__ <span style="color:Blue">20 trading days standard deviation will be used to define the dependent variable as below:
    
$\;\;\;\;\;\;$ <span style="color:Blue">Buy: If in next 5 days the price goes above (today price + 1 std deviation)
    
$\;\;\;\;\;\;$ <span style="color:Blue">Sell: If in next 5 days the price goes below (today price - 1.5 std deviation)
    
$\;\;\;\;\;\;$ <span style="color:Blue">No Action: If price oscillates between (today price + 1 std deviation) and (today price - 1.5 std deviation)

In [1]:
# Import required libraries
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
np.random.seed(0)

In [2]:
# User defined names
index = "BTC-USD"
filename = index+"_hurst_segment.csv"
date_col = "Date"
std_window = 20 # Window size to calculate std
analysis_window = 5 # Number of days in future to analyze price for tagging

In [3]:
# Get current working directory
mycwd = os.getcwd()
print(mycwd)

C:\Users\sidhu\Downloads\Course 10 Capstone Project\Trading Strategy Development\Dev\BTC-USD\Codes


In [4]:
# Change to data directory
os.chdir("..")
os.chdir(str(os.getcwd()) + "\\Data")

In [5]:
# Read the data
df = pd.read_csv(filename, index_col=date_col)
df.index = pd.to_datetime(df.index)
df.head()

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close,hurst_100,hurst_150,hurst_200,hurst_250,...,Indicator Trend Pos to Neg,Indicator Trend Neg to Pos,Increasing days,Decreasing days,Zero Cross Neg,Zero Cross Pos,Zero Cross Total,Ratio Trend,Ratio Zero,Segment
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2014-09-16,468.174011,452.421997,465.864014,457.334015,21056800.0,457.334015,,,,,...,0,0,,,,,,0.0,0.0,Mean Reverting
2014-09-17,456.859985,413.104004,456.859985,424.440002,34483200.0,424.440002,,,,,...,0,0,,,,,,0.0,0.0,Mean Reverting
2014-09-18,427.834991,384.532013,424.102997,394.79599,37919700.0,394.79599,,,,,...,0,0,,,,,,0.0,0.0,Mean Reverting
2014-09-19,423.29599,389.882996,394.673004,408.903992,36863600.0,408.903992,,,,,...,0,0,,,,,,0.0,0.0,Mean Reverting
2014-09-20,412.425995,393.181,408.084991,398.821014,26580100.0,398.821014,,,,,...,0,0,,,,,,0.0,0.0,Mean Reverting


## <span style="color:Maroon">Calculations for Dependent Variable

In [6]:
# Calculate N days standard deviation
df['DVT STD'] = df['Adj Close'].rolling(std_window).std()
# Calculate Maximum and Minimum price in next n days
df['DVT MAX'] = df['Adj Close'].rolling(analysis_window).max().shift(-analysis_window)
df['DVT MIN'] = df['Adj Close'].rolling(analysis_window).min().shift(-analysis_window)

In [7]:
# Calculate the upper and lower range as todays price +- 1 std
df['DVT Upper'] = df['Adj Close'] + df['DVT STD']
df['DVT Lower'] = df['Adj Close'] - 1.5*df['DVT STD']

In [8]:
# Define the dependent variable. We shall give preference to Buy decision over sell decision
df['Target'] = np.where(df['DVT MAX'] > df['DVT Upper'], 1,np.where(df['DVT MIN'] < df['DVT Lower'], -1, 0))

In [9]:
# Value counts
df['Target'].value_counts()

 0    1253
 1     750
-1     291
Name: Target, dtype: int64

In [10]:
# Cross Tab
df1 = df[df['hurst_200'] > 0]
pd.crosstab(df1['Target'], df1['Segment'], normalize='columns')

Segment,Mean Reverting,Trending
Target,Unnamed: 1_level_1,Unnamed: 2_level_1
-1,0.113556,0.135558
0,0.542254,0.531804
1,0.34419,0.332638


__Comments:__ <span style="color:Blue"> Both segments tend to provide almost equal buy and sell signals

## <span style="color:Maroon">Save the Data

In [11]:
# Get the columns
df.columns

Index(['High', 'Low', 'Open', 'Close', 'Volume', 'Adj Close', 'hurst_100',
       'hurst_150', 'hurst_200', 'hurst_250', 'hurst_300', 'hurst_400',
       'Adj Close MA20', 'Adj Close MA20 1diff', 'Adj Close MA20 diff Product',
       'Indicator Increasing', 'Indicator Decreasing',
       'Indicator Trend Pos to Neg', 'Indicator Trend Neg to Pos',
       'Increasing days', 'Decreasing days', 'Zero Cross Neg',
       'Zero Cross Pos', 'Zero Cross Total', 'Ratio Trend', 'Ratio Zero',
       'Segment', 'DVT STD', 'DVT MAX', 'DVT MIN', 'DVT Upper', 'DVT Lower',
       'Target'],
      dtype='object')

In [12]:
# drop columns not required in future
df.drop(['DVT MAX', 'DVT MIN', 'DVT Upper', 'DVT Lower'], axis=1, inplace=True)

In [13]:
# Get the columns
df.columns

Index(['High', 'Low', 'Open', 'Close', 'Volume', 'Adj Close', 'hurst_100',
       'hurst_150', 'hurst_200', 'hurst_250', 'hurst_300', 'hurst_400',
       'Adj Close MA20', 'Adj Close MA20 1diff', 'Adj Close MA20 diff Product',
       'Indicator Increasing', 'Indicator Decreasing',
       'Indicator Trend Pos to Neg', 'Indicator Trend Neg to Pos',
       'Increasing days', 'Decreasing days', 'Zero Cross Neg',
       'Zero Cross Pos', 'Zero Cross Total', 'Ratio Trend', 'Ratio Zero',
       'Segment', 'DVT STD', 'Target'],
      dtype='object')

In [14]:
os.chdir("..")
os.chdir(str(os.getcwd()) + "\\Data")
df.to_csv(index +"_hurst_segment_dependent"+".csv", index=True)