# Getting and Formatting Data

In [1]:
import yfinance as yf
from datetime import datetime
from pandas import DataFrame

symb = 'AAPL'
stock = yf.Ticker(symb)
start_date ="2020-08-01"
end_date = "2021-01-01"
stock_history = stock.history(start=start_date,end=end_date)

## Convert code to list

In [6]:
col_names = stock_history.columns.tolist()[0:5]
data = stock_history.values.tolist()
data = [i[0:5] for i in data]
print(col_names)

['Open', 'High', 'Low', 'Close', 'Volume']


# Using TA-Lib for Technical Analysis

## Getting a formatting data

TA-lib only needs some of the data depending on the function, so it needs to be split up into individual variables.

In [33]:
import talib as ta

open_prices = [i[0] for i in data]
high_prices = [i[1] for i in data]
low_prices = [i[2] for i in data]
close_prices = [i[3] for i in data]
volumes = [i[4] for i in data]

print(close_prices)

[108.20838928222656, 108.93101501464844, 109.32585144042969, 113.14015197753906, 110.56783294677734, 112.17491149902344, 108.83883666992188, 112.45602416992188, 114.44621276855469, 114.34422302246094, 114.04568481445312, 114.99600982666016, 115.14029693603516, 117.6952133178711, 123.76033782958984, 125.24053955078125, 124.21310424804688, 125.90228271484375, 124.39720153808594, 124.19569396972656, 128.40744018554688, 133.52224731445312, 130.75587463378906, 120.28743743896484, 120.36705017089844, 112.26695251464844, 116.74488830566406, 112.93367004394531, 111.45097351074219, 114.79450225830078, 114.97362518310547, 111.58032989501953, 109.79911041259766, 106.3162612915039, 109.5403823852539, 111.26189422607422, 106.59490203857422, 107.68950653076172, 111.7295913696289, 114.39645385742188, 113.53072357177734, 115.24229431152344, 116.21749114990234, 112.46597290039062, 115.92890930175781, 112.60528564453125, 114.51587677001953, 114.40641021728516, 116.3966064453125, 123.79019165039062, 120.

### Now we need to convert the lists to a numpy array

In [34]:
import numpy as np

open_prices = np.asarray(open_prices)
high_prices = np.asarray(high_prices)
low_prices = np.asarray(low_prices)
close_prices = np.asarray(close_prices)
volumes = np.asarray(volumes)

## TA Lib

Ta-Lib has a function called "get_functions" that will show the functions you can use.

In [36]:
ta.get_functions()

['HT_DCPERIOD',
 'HT_DCPHASE',
 'HT_PHASOR',
 'HT_SINE',
 'HT_TRENDMODE',
 'ADD',
 'DIV',
 'MAX',
 'MAXINDEX',
 'MIN',
 'MININDEX',
 'MINMAX',
 'MINMAXINDEX',
 'MULT',
 'SUB',
 'SUM',
 'ACOS',
 'ASIN',
 'ATAN',
 'CEIL',
 'COS',
 'COSH',
 'EXP',
 'FLOOR',
 'LN',
 'LOG10',
 'SIN',
 'SINH',
 'SQRT',
 'TAN',
 'TANH',
 'ADX',
 'ADXR',
 'APO',
 'AROON',
 'AROONOSC',
 'BOP',
 'CCI',
 'CMO',
 'DX',
 'MACD',
 'MACDEXT',
 'MACDFIX',
 'MFI',
 'MINUS_DI',
 'MINUS_DM',
 'MOM',
 'PLUS_DI',
 'PLUS_DM',
 'PPO',
 'ROC',
 'ROCP',
 'ROCR',
 'ROCR100',
 'RSI',
 'STOCH',
 'STOCHF',
 'STOCHRSI',
 'TRIX',
 'ULTOSC',
 'WILLR',
 'BBANDS',
 'DEMA',
 'EMA',
 'HT_TRENDLINE',
 'KAMA',
 'MA',
 'MAMA',
 'MAVP',
 'MIDPOINT',
 'MIDPRICE',
 'SAR',
 'SAREXT',
 'SMA',
 'T3',
 'TEMA',
 'TRIMA',
 'WMA',
 'CDL2CROWS',
 'CDL3BLACKCROWS',
 'CDL3INSIDE',
 'CDL3LINESTRIKE',
 'CDL3OUTSIDE',
 'CDL3STARSINSOUTH',
 'CDL3WHITESOLDIERS',
 'CDLABANDONEDBABY',
 'CDLADVANCEBLOCK',
 'CDLBELTHOLD',
 'CDLBREAKAWAY',
 'CDLCLOSINGMARUBOZU',


## SMAs

Lets get a simple moving average

### 5-Day SMA

In [37]:
sma = ta.SMA(close_prices,5)
print(sma)

[         nan          nan          nan          nan 110.03464813
 110.82795258 110.80951691 111.43555145 111.69676361 112.45204163
 112.82619629 114.05763092 114.59448547 115.24428558 117.12750854
 119.36647949 121.20989838 123.36229553 124.70269318 124.7897644
 125.42314453 127.28497314 128.25569153 127.43373871 126.66800995
 123.43991241 120.08444061 116.51999969 114.75270691 113.63819733
 114.17953186 113.14662018 112.51970825 111.49276581 110.44194183
 109.69959564 108.70251007 108.28058929 109.36325531 110.3344696
 110.78823547 112.51771393 114.22331085 114.37058716 114.67707825
 114.49199066 114.34670715 113.98449097 114.77061768 116.34287415
 117.92308807 119.1390976  120.28146973 120.68945923 119.01371307
 118.29923553 117.43947144 116.45233459 115.66023865 115.47515106
 115.29404297 114.16560059 114.08002319 112.85008087 111.60023804
 110.37427673 111.12060089 111.85896301 113.85608063 115.39861755
 116.5390152  117.4835907  117.56034546 117.67398071 118.46744537
 119.1492660

### 25 Day SMA

In [38]:
sma = ta.SMA(close_prices,25)
print(sma)

[         nan          nan          nan          nan          nan
          nan          nan          nan          nan          nan
          nan          nan          nan          nan          nan
          nan          nan          nan          nan          nan
          nan          nan          nan          nan 118.21439453
 118.37673706 118.68929199 118.83360474 118.7660376  118.93510437
 119.04705292 119.15671265 119.0504361  118.72523804 118.53308441
 118.42173279 118.08568848 117.78765686 117.54903198 117.17447662
 116.70608398 116.34725159 115.95985992 115.48261078 115.15193939
 114.51985321 113.75959839 113.10561981 112.94998657 113.08691223
 113.41648834 113.57052979 113.85791382 114.13733704 114.16201538
 114.24042908 114.42910004 114.64443909 114.9708316  115.16865723
 115.35931854 115.52171814 115.80432587 115.66819672 115.42181091
 115.27652679 115.24229553 115.3314563  115.56529541 115.56611877
 115.6859317  115.86967224 116.0466272  116.14596802 115.99103302
 115.93116

### 100 Day SMA

In [39]:
sma = ta.SMA(close_prices,100)
print(sma)

[         nan          nan          nan          nan          nan
          nan          nan          nan          nan          nan
          nan          nan          nan          nan          nan
          nan          nan          nan          nan          nan
          nan          nan          nan          nan          nan
          nan          nan          nan          nan          nan
          nan          nan          nan          nan          nan
          nan          nan          nan          nan          nan
          nan          nan          nan          nan          nan
          nan          nan          nan          nan          nan
          nan          nan          nan          nan          nan
          nan          nan          nan          nan          nan
          nan          nan          nan          nan          nan
          nan          nan          nan          nan          nan
          nan          nan          nan          nan          nan
          

### Lets make a simple algorithm for predicting the stock market using SMA's

First let's get all apple historical close prices.

In [68]:
aapl_history = stock.history(period="max")
col_names = aapl_history.columns.tolist()[0:5]
data = aapl_history.values.tolist()
close_prices = [i[3] for i in data]
close_prices = np.asarray(close_prices)

#### Now let's get a shorter-term moving average and a longer term moving average

In [120]:
sma_short = ta.SMA(close_prices,50)
sma_long = ta.EMA(close_prices,200)
print(sma_long)

[         nan          nan          nan ... 118.78336223 118.85785611
 118.91747941]


#### Now let's remove the first 199 data points since they are not valid.

In [121]:
start_idx = 198
sma_short = sma_short[start_idx:len(sma_short)]
sma_long = sma_long[start_idx:len(sma_long)]
close_prices = close_prices[start_idx:len(close_prices)]
print(len(sma_short))
print(len(sma_long))
print(len(close_prices))

9006
9006
9006


#### Creating an list to represent buy/sell decisions.

One simple strategy for investing is to make a buy when the short term moving average moves above the long term moving average. Then sell when the short term falls back below the long term. 

We will attempt to create a list that represents this, by containing a 0 on the days it should hold, a one on the days it should buy and -1 on the days it should sell. We will avoid using list comprehension for simplicity.

In [122]:
length = len(close_prices)
decision_lst = []
buy = False
sell = True

for i in range(0,length):
    short = sma_short[i]
    long = sma_long[i]
    
    if (short >= long) and buy == False:
        buy = True
        sell = False
        decision_lst.append(1)
    elif (short < long) and sell == False:
        sell = True
        buy = False
        decision_lst.append(-1)
    else:
        
        decision_lst.append(0)

print(decision_lst[0:500])
print(len(decision_lst))
print(len(close_prices))

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 

#### Calculating returns

Now lets use the decision list to calculate the returns. We will need to keep track of the current bought price and compare it to the previous sell price to figure this out. We will then sum the return list to get the total returns.

In [123]:
decision_lst = np.asarray(decision_lst)
bought_price=0
returns_list = []

for i in range(0,length):
    decision = decision_lst[i]
    rtrn = 0
    close = close_prices[i]
    if decision == 1:
        bought_price = close
    elif decision == -1:
        rtrn = (close - bought_price) / bought_price
    returns_list.append(rtrn)

total_rtrn = sum(returns_list)
print(total_rtrn)

11.309757900815983


## RSIs

Now let's take a look at RSIs.

In [135]:
rsi = ta.RSI(close_prices)
print(rsi)

[        nan         nan         nan ... 47.22225271 44.92640295
 42.26351595]


#### Now let's try to use RSIs

In [145]:
aapl_history = stock.history(period="max")
col_names = aapl_history.columns.tolist()[0:5]
data = aapl_history.values.tolist()
close_prices = [i[3] for i in data]
close_prices = np.asarray(close_prices)

In [146]:
start_idx = 14

rsi_list = ta.RSI(close_prices)
rsi_list = rsi_list[start_idx:len(rsi_list)]
close_prices = close_prices[start_idx:len(close_prices)]
print(len(rsi_list))
print(len(close_prices))

10180
10180


In [199]:
length = len(close_prices)

lower_bound = 30
upper_bound = 90

decision_lst = []

buy = False
sell = True

for i in range(0,length):
    rsi = rsi_list[i]
    
    if (rsi <= lower_bound) and buy == False:
        buy = True
        sell = False
        decision_lst.append(1)
    elif (rsi > upper_bound) and sell == False:
        sell = True
        buy = False
        decision_lst.append(-1)
    else:
        
        decision_lst.append(0)

print(decision_lst[0:500])
print(len(decision_lst))
print(len(close_prices))



[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 

In [198]:
decision_lst = np.asarray(decision_lst)
bought_price=0
returns_list = []

for i in range(0,length):
    decision = decision_lst[i]
    rtrn = 0
    close = close_prices[i]
    if decision == 1:
        bought_price = close
    elif decision == -1:
        rtrn = (close - bought_price) / bought_price
    returns_list.append(rtrn)

total_rtrn = sum(returns_list)
print(total_rtrn)

41.455509913862315
