# Finding out when to buy and sell stocks using Logistics Regression

We used logistic regression to determine when one should sell or buy stock based on each indicator. Furthermore, the accuracy of the predicted stock market prices using the indicator is compared against the orginal response, Closing price ("Close" column in dataframe)

In [None]:
import pandas as pd
import numpy as np

In [None]:
df = pd.read_csv('AAPL_clean.csv')
df = df.set_index("Date")
df.head()

### Keltner Channels

In [None]:
# KELTNER CHANNEL CALCULATION

def get_kc(high, low, close, kc_lookback, multiplier, atr_lookback):
    tr1 = pd.DataFrame(high - low)
    tr2 = pd.DataFrame(abs(high - close.shift()))
    tr3 = pd.DataFrame(abs(low - close.shift()))
    frames = [tr1, tr2, tr3]
    tr = pd.concat(frames, axis = 1, join = 'inner').max(axis = 1)
    atr = tr.ewm(alpha = 1/atr_lookback).mean()
    
    kc_middle = close.ewm(kc_lookback).mean()
    kc_upper = close.ewm(kc_lookback).mean() + multiplier * atr
    kc_lower = close.ewm(kc_lookback).mean() - multiplier * atr
    
    return kc_middle, kc_upper, kc_lower
kc = df
kc['kc_middle'], kc['kc_upper'], kc['kc_lower'] = get_kc(kc['High'], kc['Low'], kc['Close'], 20, 2, 10)
kc.tail(20)

In [None]:
# KELTNER CHANNEL STRATEGY

def implement_kc_strategy(prices, kc_upper, kc_lower):
    buy_price = []
    sell_price = []
    kc_signal = []
    signal = 0
    
    for i in range(len(prices)):
        if prices[i] < kc_lower[i] and i+1 < len(prices) and prices[i+1] > prices[i]:
            if signal != 1:
                buy_price.append(prices[i])
                sell_price.append(np.nan)
                signal = 1
                kc_signal.append(signal)
            else:
                buy_price.append(np.nan)
                sell_price.append(np.nan)
                kc_signal.append(0)
        elif prices[i] > kc_upper[i] and i+1 < len(prices) and prices[i+1] < prices[i]:
            if signal != -1:
                buy_price.append(np.nan)
                sell_price.append(prices[i])
                signal = -1
                kc_signal.append(signal)
            else:
                buy_price.append(np.nan)
                sell_price.append(np.nan)
                kc_signal.append(0)
        else:
            buy_price.append(np.nan)
            sell_price.append(np.nan)
            kc_signal.append(0)
            
    return buy_price, sell_price, kc_signal

buy_price, sell_price, kc_signal = implement_kc_strategy(kc['Close'], kc['kc_upper'], kc['kc_lower'])
kc.tail(20)

### Head and Shoulders

In [None]:
df["left_shoulder"] = df["High"].rolling(window=20).max()
df["right_shoulder"] = df["High"].rolling(window=20).max().shift(periods=-20)
df["head"] = df["High"].rolling(window=20).max().shift(periods=-10)

df["left_shoulder_diff"] = (df["head"] - df["left_shoulder"]) / df["head"]
df["right_shoulder_diff"] = (df["head"] - df["right_shoulder"]) / df["head"]

df["head_shoulders"] = ((df["left_shoulder_diff"] <= 0.05) & (df["right_shoulder_diff"] <= 0.05)).astype(int)
df.tail(30)

### Bollinger Bands

In [None]:
# Calculate the 20-day SMA and standard deviation
df['SMA'] = df['Close'].rolling(window=20).mean()
df['STD'] = df['Close'].rolling(window=20).std()

# Calculate the upper and lower Bollinger Bands
df['UpperBand'] = df['SMA'] + (df['STD'] * 2)
df['LowerBand'] = df['SMA'] - (df['STD'] * 2)
df = df.rename(columns={'UpperBand': 'BB_upperband'})
df = df.rename(columns={'LowerBand': 'BB_lowerband'})

df.tail()


### Moving Average

In [None]:
window_size = 50
df["MA"] = df["Close"].rolling(window_size).mean()

In [None]:
df.tail()

### RSI

In [None]:

# Calculate the price change between each day's closing price and the previous day's closing price
delta = df["Close"].diff()

# Define the time period for the RSI calculation
n = 14

# Calculate the average gain and loss for the first n days
gain = delta[1:n+1][delta[1:n+1] > 0].sum() / n
loss = -delta[1:n+1][delta[1:n+1] < 0].sum() / n

# Initialize the arrays to store the RSI values and the average gain and loss values
rsi = [100 * gain / (gain + loss)]
avg_gain = [gain]
avg_loss = [loss]

# Calculate the RSI values for the remaining days
for i in range(1, len(df)):
    change = delta[i]
    if change > 0:
        avg_gain.append((avg_gain[-1] * (n - 1) + change) / n)
        avg_loss.append((avg_loss[-1] * (n - 1)) / n)
    else:
        avg_gain.append((avg_gain[-1] * (n - 1)) / n)
        avg_loss.append((avg_loss[-1] * (n - 1) - change) / n)
    rs = avg_gain[-1] / avg_loss[-1]
    rsi.append(100 - 100 / (1 + rs))

# Slice the RSI array to match the length of the DataFrame
rsi = rsi[-len(df):]

# Add the RSI values to the DataFrame
df["RSI"] = rsi
# Print the DataFrame with the RSI values
df.head()

# Logistics Regression

In [None]:
# findng out where to buy and sell using the original data
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

# Convert Keltner channels into binary labels (1: up, 0: down)
df["label"] = np.where(df['Close'].shift(-1) > df['Close'], 1, 0)

In [None]:
# KELTNER CHANNELS

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score


# Create the binary labels (1: buy, 0: sell)
kcsignal = np.where(df['Close'].shift(-1) > df['kc_upper'].shift(-1), 0,
                        np.where(df['Close'].shift(-1) < df['kc_lower'].shift(-1), 1, np.nan))

df.tail()
# Split the data into training and testing sets
kc_middle = pd.DataFrame(df['kc_middle'])  
label = pd.DataFrame(df['label']) 

train_index = int(len(df)*0.8)
test_index = len(df) - train_index

# Train Set 
X_train = pd.DataFrame(kc_middle[:train_index])
y_train = pd.DataFrame(label[:train_index])

# Test Set : 200 samples
X_test = pd.DataFrame(kc_middle[train_index:])
y_test = pd.DataFrame(label[train_index:])
# Fit logistic regression model
model = LogisticRegression()
model.fit(X_train.values.reshape(-1, 1), y_train)

# Predict labels for testing set
y_pred = model.predict(X_test.values.reshape(-1, 1))

# Evaluate model performance
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Create a plot of the Keltner Channels indicator and the buy/sell signals
plt.figure(figsize=(18, 6))
plt.plot(df['Close'], label='Price')
plt.scatter(df[kcsignal == 1].index, df[kcsignal == 1]['Close'], marker='^', color='blue', label='Buy')
plt.scatter(df[kcsignal == 0].index, df[kcsignal == 0]['Close'], marker='v', color='red', label='Sell')
plt.legend()
plt.show()

In [None]:
# BOLLINGER BANDS

# Create the binary labels (1: buy, 0: sell)
bbsignal = np.where(df['Close'].shift(-1) > df['BB_upperband'].shift(-1), 0,
                        np.where(df['Close'].shift(-1) < df['BB_lowerband'].shift(-1), 1, np.nan))                       
features = ['BB_upperband', 'BB_lowerband', 'SMA']
df = df.dropna()
bbsignal = bbsignal[:len(df['Close'])] 
# Split the data into training and testing sets

# Split the data into training and testing sets
BB = pd.DataFrame(df[features])  
label = pd.DataFrame(df['label']) 

train_index = int(len(df)*0.8)
test_index = len(df) - train_index

# Train Set 
X_train = pd.DataFrame(BB[:train_index])
y_train = pd.DataFrame(label[:train_index])

# Test Set : 200 samples
X_test = pd.DataFrame(BB[train_index:])
y_test = pd.DataFrame(label[train_index:])

# Fit logistic regression model
model = LogisticRegression()
model.fit(X_train.values.reshape(-1, len(features)), y_train)

# Predict labels for testing set
y_pred = model.predict(X_test.values.reshape(-1, len(features)))

# Evaluate model performance
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Create a plot of the Bollinger Bands indicator and the buy/sell signals
plt.figure(figsize=(18, 6))
plt.plot(df['Close'], label='Price')
plt.scatter(df[bbsignal == 1].index, df[bbsignal == 1]['Close'], marker='^', color='blue', label='Buy')
plt.scatter(df[bbsignal == 0].index, df[bbsignal == 0]['Close'], marker='v', color='red', label='Sell')
plt.legend()
plt.show()



In [None]:
# MOVING AVERAGE

# Create the binary labels (1: buy, 0: sell)
masignal = np.where(df['MA'].shift(-1) > df['Close'], 1, 0)
df = df.dropna()
masignal = masignal[:len(df['Close'])]
# Split the data into training and testing sets

MA = pd.DataFrame(df['MA'])  
label = pd.DataFrame(df['label']) 

train_index = int(len(df)*0.8)
test_index = len(df) - train_index

# Train Set 
X_train = pd.DataFrame(MA[:train_index])
y_train = pd.DataFrame(label[:train_index])

# Test Set : 200 samples
X_test = pd.DataFrame(MA[train_index:])
y_test = pd.DataFrame(label[train_index:])

# Fit logistic regression model
model = LogisticRegression()
model.fit(X_train.values.reshape(-1, 1), y_train)

# Predict labels for testing set
y_pred = model.predict(X_test.values.reshape(-1, 1))

# Evaluate model performance
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Create a plot of the Moving Average indicator and the buy/sell signals
plt.figure(figsize=(18, 6))
plt.plot(df['Close'], label='Price')
plt.scatter(df[masignal == 1].index, df[masignal == 1]['Close'], marker='^', color='blue', label='Buy')
plt.scatter(df[masignal == 0].index, df[masignal == 0]['Close'], marker='v', color='red', label='Sell')
plt.legend()
plt.show()

In [None]:
# Heads and Shoulders
# Calculate the neckline (a line connecting the lows of the two shoulders)
df["Neckline"] = (df["left_shoulder"] + df["right_shoulder"]) / 2

# Above neckline you sell, (0), below neckline you buy (1)
hssignal = np.where(df['Close'].shift(-1) > df['Close'], 1, 0)
features = ["left_shoulder", "right_shoulder"]
df = df.dropna()
hssignal = hssignal[:len(df['Close'])]
# Split the data into training and testing sets
HnS = pd.DataFrame(df[features])  
label = pd.DataFrame(df['label']) 

train_index = int(len(df)*0.8)
test_index = len(df) - train_index

# Train Set 
X_train = pd.DataFrame(HnS[:train_index])
y_train = pd.DataFrame(label[:train_index])

# Test Set : 200 samples
X_test = pd.DataFrame(HnS[train_index:])
y_test = pd.DataFrame(label[train_index:])

# Fit logistic regression model
model = LogisticRegression()
model.fit(X_train, y_train)

# Predict labels for testing set
y_pred = model.predict(X_test)

# Evaluate model performance
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Create a plot of the Moving Average indicator and the buy/sell signals
plt.figure(figsize=(18, 6))
plt.plot(df['Close'], label='Price')
plt.scatter(df[hssignal == 1].index, df[hssignal == 1]['Close'], marker='^', color='blue', label='Buy')
plt.scatter(df[hssignal == 0].index, df[hssignal == 0]['Close'], marker='v', color='red', label='Sell')
plt.legend()
plt.show()


In [None]:
# RSI
# Create the binary labels (1: buy, 0: sell)
rsisignal = np.where(df["RSI"].shift(-1) > 70, 0, np.where(df["RSI"].shift(-1) < 30, 1, -1))

# Split the data into training and testing sets
rsi = pd.DataFrame(df['RSI'])  
label = pd.DataFrame(df['label']) 

train_index = int(len(df)*0.8)
test_index = len(df) - train_index

# Train Set 
X_train = pd.DataFrame(rsi[:train_index])
y_train = pd.DataFrame(label[:train_index])

# Test Set : 200 samples
X_test = pd.DataFrame(rsi[train_index:])
y_test = pd.DataFrame(label[train_index:])

rsisignal = rsisignal[:len(df['Close'])]

# Fit logistic regression model
model = LogisticRegression()
model.fit(X_train.values.reshape(-1, 1), y_train)

# Predict labels for testing set
y_pred = model.predict(X_test.values.reshape(-1, 1))

# Evaluate model performance
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Create a plot of the Moving Average indicator and the buy/sell signals
plt.figure(figsize=(18, 6))
plt.plot(df['Close'], label='Price')
plt.scatter(df[rsisignal == 1].index, df[rsisignal == 1]['Close'], marker='^', color='blue', label='Buy')
plt.scatter(df[rsisignal == 0].index, df[rsisignal == 0]['Close'], marker='v', color='red', label='Sell')
plt.legend()
plt.show()