## Data Pulling and Cleaning with Pandas and yfinance

In [None]:
import pandas as pd
import yfinance as yf

In [None]:
data = yf.download(tickers='BTC-USD', period='max', interval='1d')

In [None]:
data

In [None]:
data.shape

## Clean by Removing 0 Volume and Invalid Bars

In [None]:
# Check for rows to drop
zeroIndex = data[data["Volume"] == 0].index

data.drop(zeroIndex, inplace=True)
df.isna().sum()

In [None]:
# common invalid rows
data[data["High"] == data['Low']]
data_filtered = data[data["High"] != data['Low']].copy()

In [None]:
data_filtered.reset_index(drop=False)
df = data_filtered

## Chart

In [None]:
import plotly.graph_objects as go
fig = go.Figure(data=[go.Candlestick(x = df.index,
                                    open = df['Open'],
                                    high = df['High'],
                                    low = df['Low'],
                                    close = df['Close'])])

fig.update_layout(paper_bgcolor="cornflowerblue", plot_bgcolor="lavender",
                 margin_l=0, margin_b=0, margin_r=0, margin_t=0)

fig.show()

In [None]:
import matplotlib.pyplot as plt
plt.plot(df.index, df.Close)
plt.show()

## Technical Indicators

In [47]:
import numpy as np
import pandas_ta as ta
#df.ta.indicators()
#help(ta.atr)
df['ATR'] = df.ta.atr(length=20)
df['RSI'] = df.ta.rsi()
df['Mid'] = df.ta.midprice(length=1)
df['MA40'] = df.ta.sma(length=40)
df['MA80'] = df.ta.sma(length=80)
df['MA160'] = df.ta.sma(length=160)

from scipy.stats import linregress
def get_slope(array):
    y = np.array(array)
    x = np.arange(len(y))
    slope, intercept, r_val, p_val, std_err = linregress(x,y)
    return slope

backrollingN = 6
df['slopeMA40'] = df['MA40'].rolling(window=backrollingN).apply(get_slope)
df['slopeMA80'] = df['MA80'].rolling(window=backrollingN).apply(get_slope)
df['slopeMA160'] = df['MA160'].rolling(window=backrollingN).apply(get_slope)
df['MidSlope'] = df['Mid'].rolling(window=backrollingN).apply(get_slope)
df['RSISlope'] = df['RSI'].rolling(window=backrollingN).apply(get_slope)

In [48]:
df.tail()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,ATR,RSI,Mid,MA40,MA80,MA160,slopeMA40,slopeMA80,slopeMA160,MidSlope,RSISlope
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2024-02-05,42577.621094,43494.25,42264.816406,42658.667969,42658.667969,18715487317,1410.29724,51.379813,42879.533203,42853.836621,41869.943555,35791.802136,-7.831733,79.213669,103.778311,0.112165,-0.363566
2024-02-06,42657.390625,43344.148438,42529.019531,43084.671875,43084.671875,16798476726,1380.538824,54.048625,42936.583984,42865.257031,41951.180664,35890.473425,-5.385153,77.851596,101.391349,31.892801,-0.380713
2024-02-07,43090.019531,44341.949219,42775.957031,44318.222656,44318.222656,21126587775,1389.811492,60.764581,43558.953125,42920.727539,42037.826611,36005.390613,3.637093,77.927439,101.597628,67.275056,0.920877
2024-02-08,44332.125,45575.839844,44332.125,45301.566406,45301.566406,26154524080,1383.201777,65.138659,44953.982422,42999.344141,42135.634229,36127.270874,22.591164,83.466825,105.093255,333.378069,2.582113
2024-02-10,47153.527344,47495.109375,47080.707031,47415.148438,47415.148438,36158255104,1423.718836,72.289419,47287.908203,43128.093164,42280.650928,36261.935559,50.435781,94.746539,112.531225,845.830469,4.424958


## Target Catgories