In [516]:
import pandas as pd
import numpy as np 
import yfinance as yf

In [517]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.linear_model import LogisticRegression 
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

In [518]:
import talib

In [519]:
aapl = yf.download("ETH-USD")

[*********************100%***********************]  1 of 1 completed


In [520]:
aapl

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2017-11-09 00:00:00+00:00,308.644989,329.451996,307.056000,320.884003,320.884003,893249984
2017-11-10 00:00:00+00:00,320.670990,324.717987,294.541992,299.252991,299.252991,885985984
2017-11-11 00:00:00+00:00,298.585999,319.453003,298.191986,314.681000,314.681000,842300992
2017-11-12 00:00:00+00:00,314.690002,319.153015,298.513000,307.907990,307.907990,1613479936
2017-11-13 00:00:00+00:00,307.024994,328.415009,307.024994,316.716003,316.716003,1041889984
...,...,...,...,...,...,...
2022-11-08 00:00:00+00:00,1568.329590,1574.799805,1259.443115,1332.835571,1332.835571,42048003440
2022-11-09 00:00:00+00:00,1333.122437,1335.743530,1083.285645,1100.169800,1100.169800,38864492427
2022-11-10 00:00:00+00:00,1100.107178,1341.791138,1093.122559,1299.464600,1299.464600,28581002122
2022-11-11 00:00:00+00:00,1298.882446,1302.295288,1211.329590,1287.221069,1287.221069,20920539099


## Simple moving average

In [521]:
aapl['SMA_5'] = aapl['Close'].transform(lambda x: x.rolling(window = 5).mean())
aapl["SMA_15"] = aapl["Close"].transform(lambda x:x.rolling(window = 15).mean())

## Simple moving average volume



In [522]:
aapl['SMA_5_volume'] = aapl['Volume'].transform(lambda x: x.rolling(window = 5).mean())
aapl['SMA_15_volume'] = aapl['Volume'].transform(lambda x: x.rolling(window = 15).mean())


## Wilder smoothing

In [523]:
def Wilder(data, periods):
    start = np.where(~np.isnan(data))[0][0] #Check if nans present in beginning
    Wilder = np.array([np.nan]*len(data))
    Wilder[start+periods-1] = data[start:(start+periods)].mean() #Simple Moving Average
    for i in range(start+periods,len(data)):
        Wilder[i] = (Wilder[i-1]*(periods-1) + data[i])/periods #Wilder Smoothing
    return(Wilder)

## Average True Range (ATR)

In [524]:
aapl['true_range'] = np.maximum((aapl['High'] - aapl['Low']), 
                         abs(aapl['High'] - aapl['Close'].shift()), 
                         abs(aapl['Low']-aapl["Close"].shift()))

In [525]:
TR_data = aapl.copy()
aapl.loc[:,'ATR_5'] = Wilder(aapl['true_range'], 5)
aapl.loc[:,'ATR_15'] = Wilder(aapl['true_range'], 15)

 ## Average Directional Index

## Stochastic Oscillators


In [526]:

aapl['Lowest_5D'] = aapl['Low'].transform(lambda x: x.rolling(window = 5).min())
aapl['High_5D'] = aapl['High'].transform(lambda x: x.rolling(window = 5).max())
aapl['Lowest_15D'] =aapl['Low'].transform(lambda x: x.rolling(window = 15).min())
aapl['High_15D'] =aapl['High'].transform(lambda x: x.rolling(window = 15).max())

aapl['Stochastic_5'] = ((aapl['Close'] - aapl['Lowest_5D'])/(aapl['High_5D'] - aapl['Lowest_5D']))*100
aapl['Stochastic_15'] = ((aapl['Close'] - aapl['Lowest_15D'])/(aapl['High_15D'] - aapl['Lowest_15D']))*100

aapl['Stochastic_avg_5'] = aapl['Stochastic_5'].rolling(window = 5).mean()
aapl['Stochastic_avg_15'] = aapl['Stochastic_5'].rolling(window = 15).mean()



## Relative Strength Index (RSI)

RSI is one of the most common momentum indicator aimed at quantifies price changes and the speed of such change.

In [527]:
aapl['Diff'] = aapl['Close'].transform(lambda x: x.diff())
aapl['Up'] = aapl['Diff']
aapl.loc[(aapl['Up']<0), 'Up'] = 0

aapl['Down'] = aapl['Diff']
aapl.loc[(aapl['Down']>0), 'Down'] = 0 
aapl['Down'] = abs(aapl['Down'])

aapl['avg_5up'] = aapl['Up'].transform(lambda x: x.rolling(window=5).mean())
aapl['avg_5down'] = aapl['Down'].transform(lambda x: x.rolling(window=5).mean())

aapl['avg_15up'] = aapl['Up'].transform(lambda x: x.rolling(window=14).mean())
aapl['avg_15down'] = aapl['Down'].transform(lambda x: x.rolling(window=14).mean())

aapl['RS_5'] = aapl['avg_5up'] / aapl['avg_5down']
aapl['RS_15'] = aapl['avg_15up'] / aapl['avg_15down']

aapl['RSI_5'] = 100 - (100/(1+aapl['RS_5']))
aapl['RSI_15'] = 100 - (100/(1+aapl['RS_15']))


## Moving Average Convergence Divergence (MACD)

In [528]:
aapl['5Ewm'] = aapl['Close'].transform(lambda x: x.ewm(span=5, adjust=False).mean())
aapl['15Ewm'] = aapl['Close'].transform(lambda x: x.ewm(span=15, adjust=False).mean())
aapl['MACD'] = aapl['15Ewm'] - aapl['5Ewm']

## Bollinger Bands

In [529]:
aapl['15MA'] = aapl['Close'].transform(lambda x: x.rolling(window=15).mean())
aapl['SD'] = aapl['Close'].transform(lambda x: x.rolling(window=15).std())
aapl['upperband'] = aapl['15MA'] + 2*aapl['SD']
aapl['lowerband'] = aapl['15MA'] - 2*aapl['SD']

In [548]:
aapl.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1812 entries, 2017-11-27 00:00:00+00:00 to 2022-11-12 00:00:00+00:00
Data columns (total 40 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Open               1812 non-null   float64
 1   High               1812 non-null   float64
 2   Low                1812 non-null   float64
 3   Close              1812 non-null   float64
 4   Adj Close          1812 non-null   float64
 5   Volume             1812 non-null   int64  
 6   SMA_5              1812 non-null   float64
 7   SMA_15             1812 non-null   float64
 8   SMA_5_volume       1812 non-null   float64
 9   SMA_15_volume      1812 non-null   float64
 10  true_range         1812 non-null   float64
 11  ATR_5              1812 non-null   float64
 12  ATR_15             1812 non-null   float64
 13  Lowest_5D          1812 non-null   float64
 14  High_5D            1812 non-null   float64
 15  Lowest_15D         1812 

## Model

In [531]:
aapl.dropna(inplace=True)

In [532]:
aapl["y"] = np.where(aapl.Open*102/100 <= aapl.Close, 1, 0)

In [533]:
X = aapl.iloc[:,:-1]
y = aapl.iloc[:,-1]

In [534]:
X.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Open,1812.0,1134.791,1207.726,84.27969,208.0412,479.6935,1808.013,4810.071
High,1812.0,1171.758,1244.69,85.34274,213.2367,493.2315,1845.759,4891.705
Low,1812.0,1092.573,1164.979,82.82989,203.1158,465.4231,1739.144,4718.039
Close,1812.0,1134.988,1207.142,84.3083,208.5001,479.9195,1808.237,4812.087
Adj Close,1812.0,1134.988,1207.142,84.3083,208.5001,479.9195,1808.237,4812.087
Volume,1812.0,13187120000.0,10862330000.0,943650000.0,4649665000.0,11082030000.0,18448030000.0,84482910000.0
SMA_5,1812.0,1134.109,1205.32,86.22876,207.8706,475.3239,1811.262,4716.166
SMA_15,1812.0,1131.154,1201.479,92.27483,206.0293,472.4814,1788.696,4606.565
SMA_5_volume,1812.0,13163430000.0,10349710000.0,1073096000.0,4582667000.0,12060090000.0,18683520000.0,62672280000.0
SMA_15_volume,1812.0,13103400000.0,10062700000.0,1119800000.0,4447956000.0,12463440000.0,18261980000.0,56859200000.0


In [535]:
X = X.drop(["RS_5"],axis = 1)

In [536]:
scaler = StandardScaler()

In [537]:
X_scaled = scaler.fit_transform(X)

In [551]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled,y,train_size=0.75)

model = LogisticRegression(max_iter=1000, C=50)
model.fit(X_train,y_train)

y_predict = model1.predict(X_test)
confusion_matrix(y_test,y_predict)



array([[321,   1],
       [ 11, 120]], dtype=int64)

In [552]:
accuracy_score(y_test,y_predict)

0.9735099337748344