# Importing the libraries

In [20]:
import time
program_start = time.time()
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from alpha_vantage.timeseries import TimeSeries
import os

# API key and ticker symbol

In [21]:
av_key = os.environ.get('av_key')
ticker = 'TSLA'

# Importing the dataset

In [22]:
ts = TimeSeries(key=av_key, output_format = 'pandas')
df, meta_data = ts.get_intraday(symbol=ticker, interval='1min', outputsize='full')
print(df.head())

                     1. open  2. high   3. low  4. close  5. volume
date                                                               
2020-07-17 20:00:00  1506.78  1506.78  1506.78   1506.78      873.0
2020-07-17 19:59:00  1505.75  1505.75  1505.75   1505.75     1149.0
2020-07-17 19:56:00  1505.75  1505.75  1505.75   1505.75      524.0
2020-07-17 19:55:00  1505.00  1505.00  1505.00   1505.00      656.0
2020-07-17 19:54:00  1504.50  1504.50  1504.50   1504.50      310.0


In [23]:
print(df.info())

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 3644 entries, 2020-07-17 20:00:00 to 2020-07-13 04:01:00
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   1. open    3644 non-null   float64
 1   2. high    3644 non-null   float64
 2   3. low     3644 non-null   float64
 3   4. close   3644 non-null   float64
 4   5. volume  3644 non-null   float64
dtypes: float64(5)
memory usage: 170.8 KB
None


# Data preprocessing

In [24]:
df.columns = ['o', 'h', 'l', 'c', 'vol'] #Renaming columns
color, rng, u_shadow, l_shadow, ub, lb = [], [], [], [], [], []

for ind in df.index:
    rng.append(df['h'][ind] - df['l'][ind]) #Calculating range
    
    if df['o'][ind] > df['c'][ind]:
        color.append('red') #open higher than close --> bearishness
    else:
        color.append('blue') #open is lower than or equal to close --> bulllishness

    if color[::-1] == 'red':  #Determining shadow length and bounds for red candles
        u_shadow.append(df['h'][ind] - df['o'][ind])
        l_shadow.append(df['c'][ind] - df['l'][ind])
        ub.append(df['o'][ind])
        lb.append(df['c'][ind])
    else: #Determining shadow length and bounds for blue candles
        u_shadow.append(df['h'][ind] - df['c'][ind])
        l_shadow.append(df['o'][ind] - df['l'][ind])
        ub.append(df['c'][ind])
        lb.append(df['o'][ind])
        
df['color'] = color
df['range'] = rng
df['u_shadow'] = u_shadow
df['l_shadow'] = l_shadow
df['u_bound'] = ub
df['l_bound'] = lb
print(df.head())

                           o        h        l        c     vol color  range  \
date                                                                           
2020-07-17 20:00:00  1506.78  1506.78  1506.78  1506.78   873.0  blue    0.0   
2020-07-17 19:59:00  1505.75  1505.75  1505.75  1505.75  1149.0  blue    0.0   
2020-07-17 19:56:00  1505.75  1505.75  1505.75  1505.75   524.0  blue    0.0   
2020-07-17 19:55:00  1505.00  1505.00  1505.00  1505.00   656.0  blue    0.0   
2020-07-17 19:54:00  1504.50  1504.50  1504.50  1504.50   310.0  blue    0.0   

                     u_shadow  l_shadow  u_bound  l_bound  
date                                                       
2020-07-17 20:00:00       0.0       0.0  1506.78  1506.78  
2020-07-17 19:59:00       0.0       0.0  1505.75  1505.75  
2020-07-17 19:56:00       0.0       0.0  1505.75  1505.75  
2020-07-17 19:55:00       0.0       0.0  1505.00  1505.00  
2020-07-17 19:54:00       0.0       0.0  1504.50  1504.50  


# Single candlestick patterns

In [25]:
s_pattern = []
for ind in df.index:
    if (df['u_shadow'][ind]/df['u_bound'][ind] <= 0.003 and 
        df['l_shadow'][ind]/df['l_bound'][ind] <= 0.003):
        s_pattern.append('marubozu')
    elif df['u_shadow'][ind] == df['l_shadow'][ind]:
        if df['range'][ind] < 0.005:
            s_pattern.append('doji')
        else:
            s_pattern.append('spinning top')
    elif df['u_shadow'][ind] < 0.003 and df['l_shadow'][ind] > 2*df['range'][ind]:
        s_pattern.append('hammer')
    elif df['l_shadow'][ind] < 0.003 and df['u_shadow'][ind] > 2*df['range'][ind]:
        s_pattern.append('shooting star')
    else:
        s_pattern.append('others')
df['s_pattern'] = s_pattern
print(df.head())

                           o        h        l        c     vol color  range  \
date                                                                           
2020-07-17 20:00:00  1506.78  1506.78  1506.78  1506.78   873.0  blue    0.0   
2020-07-17 19:59:00  1505.75  1505.75  1505.75  1505.75  1149.0  blue    0.0   
2020-07-17 19:56:00  1505.75  1505.75  1505.75  1505.75   524.0  blue    0.0   
2020-07-17 19:55:00  1505.00  1505.00  1505.00  1505.00   656.0  blue    0.0   
2020-07-17 19:54:00  1504.50  1504.50  1504.50  1504.50   310.0  blue    0.0   

                     u_shadow  l_shadow  u_bound  l_bound s_pattern  
date                                                                 
2020-07-17 20:00:00       0.0       0.0  1506.78  1506.78  marubozu  
2020-07-17 19:59:00       0.0       0.0  1505.75  1505.75  marubozu  
2020-07-17 19:56:00       0.0       0.0  1505.75  1505.75  marubozu  
2020-07-17 19:55:00       0.0       0.0  1505.00  1505.00  marubozu  
2020-07-17 19:54:00

## Analysing single candlestick pattern detection results

In [26]:
count = df['s_pattern'].value_counts()
print(count)

marubozu        3011
others           603
spinning top      30
Name: s_pattern, dtype: int64


In [27]:
program_end = time.time()
print("Runtime = {0}".format(program_end - program_start))

Runtime = 6.019988775253296
