## Import

In [1]:
%run indicator_functions.ipynb

import pandas as pd
import numpy as np

import requests
import json

import matplotlib.pyplot as plt
%matplotlib inline

from IPython.display import display
pd.set_option('display.max_columns', None)

### API Call

API used is from [Alpha Vantage](https://www.alphavantage.co/documentation/)

In [2]:
function = 'TIME_SERIES_DAILY'
symbol = 'AMZN'
api_key = '1W0X0E2POGJ6T74Z'
output_size = 'full'


url = 'https://www.alphavantage.co/query?function={}&symbol={}&outputsize={}&apikey={}'.format(
            function, symbol, output_size, api_key)

response = requests.get(url)

# grab correct info into dataframe
response_dict = json.loads(response.text)
data = response_dict['Time Series (Daily)']
df = pd.DataFrame.from_dict(data)

# Fix dataframe into correct format
df = df.T
df = df.iloc[::-1]

df = df.astype(float)
df.index = pd.to_datetime(df.index)
df.rename_axis(symbol, inplace=True)

df.columns = (['Open', 'High', 'Low', 'Close', 'Volume'])

In [3]:
df

Unnamed: 0_level_0,Open,High,Low,Close,Volume
AMZN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1999-11-01,68.06,71.8800,66.3100,69.13,12824100.0
1999-11-02,69.75,70.0000,65.0600,66.44,13243200.0
1999-11-03,68.19,68.5000,65.0000,65.81,10772100.0
1999-11-04,67.19,67.1900,61.0000,63.06,16759200.0
1999-11-05,64.75,65.5000,62.2500,64.94,11091400.0
...,...,...,...,...,...
2021-02-17,3263.60,3320.9100,3259.4996,3308.64,3261801.0
2021-02-18,3282.42,3338.0000,3273.9400,3328.23,3033607.0
2021-02-19,3328.23,3333.5000,3245.7500,3249.90,4305184.0
2021-02-22,3208.13,3232.3199,3172.2600,3180.74,3461266.0


## Column Engineering

### Sentiments

Creating three different y values:  
  
<u>regression model:</u>
<ul>
<li> **"r_percent_increase_high"** measures the percent jump from last close to the highest high within a five day period. </li> 
     
<li> **"r_percent_increase_close"** measures the percent jump from last close to the highest close within a five day period.  </li>
</ul>

<u>classification model:</u>
<ul>
<li> **"c_four_percent_high"** measures if the current day will see a 4 percent jump to the highest high within a five day period.</li>

</ul>

In [4]:
# initializing new features
df['r_percent_increase_high'] = np.nan
df['r_percent_increase_close'] = np.nan
df['c_four_percent_high'] = np.nan


for i in range(len(df)):
    
    # end loop when can't confirm then next five days
    if i == len(df)-5:
        break
        
    current_close = df.iloc[i]['Close']
    max_high = max(df.iloc[i+1 : i+6]['High'])
    max_close = max(df.iloc[i+1 : i+6]['Close'])
    
    percent_change_up = (max_high - current_close) / current_close
    percent_change_up_close = (max_close - current_close) / current_close
    
    #adding new features to df
    df.iloc[i, df.columns.get_loc('r_percent_increase_high')] = percent_change_up * 100
    df.iloc[i, df.columns.get_loc('r_percent_increase_close')] = percent_change_up_close * 100
    
    if percent_change_up >= .04:
        df.iloc[i, df.columns.get_loc('c_four_percent_high')] = 'Buy'
    else:
        df.iloc[i, df.columns.get_loc('c_four_percent_high')] = '0'

In [5]:
df.iloc[-17:-12]

Unnamed: 0_level_0,Open,High,Low,Close,Volume,r_percent_increase_high,r_percent_increase_close,c_four_percent_high
AMZN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2021-01-29,3230.0,3236.99,3184.55,3206.2,4293556.0,7.104984,5.420747,Buy
2021-02-01,3242.36,3350.26,3235.025,3342.88,4160212.0,2.725793,1.11042,0
2021-02-02,3380.0,3427.74,3361.125,3380.0,6183716.0,1.597633,-0.823964,0
2021-02-03,3425.01,3434.0,3308.62,3312.53,7088781.0,1.946247,1.196065,0
2021-02-04,3330.0,3347.0,3277.75,3331.0,3670661.0,1.380967,0.634944,0


### Technical Indicators

All technical indicator functions is in the indicator_functions notebook

In [6]:
for i in range(len(df)):
    sma(df, i)
    stochastic(df, i)
    rsi(df,i)
    roc(df,i)
    atr(df,i)
    adx(df,i)

In [7]:
df.tail()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,r_percent_increase_high,r_percent_increase_close,c_four_percent_high,SMA,SMA_diff,Stochastic,Stochastic_diff,rsi_up_var,rsi_down_var,RSI,RSI_diff,ROC,ROC_diff,tr,ATR,ATR_diff,pos_dx,neg_dx,dx,ADX,ADX_diff
AMZN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1
2021-02-17,3263.6,3320.91,3259.4996,3308.64,3261801.0,,,,3301.677778,-0.432222,49.74544,15.911004,39.69,0.0,58.067459,13.510477,-0.117433,3.16807,61.4104,71.143864,-5.573543,12.61,0.0,12.143636,9.981486,-0.12422
2021-02-18,3282.42,3338.0,3273.94,3328.23,3033607.0,,,,3301.37,-0.307778,57.598717,7.853277,19.59,0.0,59.322976,1.255517,-0.083158,0.034275,64.06,70.506007,-0.637857,17.09,0.0,23.871276,10.650869,0.669383
2021-02-19,3328.23,3333.5,3245.75,3249.9,4305184.0,,,,3290.008889,-11.361111,8.266481,-49.332237,0.0,78.33,54.100514,-5.222461,-3.050281,-2.967123,87.75,72.98315,2.477143,0.0,0.0,23.871276,11.320252,0.669383
2021-02-22,3208.13,3232.3199,3172.26,3180.74,3461266.0,,,,3274.208889,-15.8,3.239856,-5.026624,0.0,69.16,32.57833,-21.522184,-4.279343,-1.229062,77.64,68.238864,-4.744286,0.0,0.0,15.076784,10.043158,-1.277094
2021-02-23,3127.03,3204.73,3093.6,3194.5,4615557.0,,,,3261.931111,-12.277778,29.641598,26.401742,13.76,0.0,29.014888,-3.563442,-3.343419,0.935924,111.13,70.115293,1.876429,0.0,0.0,97.29276,16.478663,6.435505


## Export

In [8]:
df.to_csv('../data/amzn.csv')