<a href="https://colab.research.google.com/github/GreenStreetQuant/Alpaca-Algo-Trading-Bot/blob/master/alpaca_bot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install yfinance

In [12]:
import os
import alpaca_trade_api as tradeapi
import pandas as pd
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from feature_selector import FeatureSelector
import yfinance as yf
import numpy as np
from sklearn.ensemble import RandomForestClassifier

  from numpy.core.umath_tests import inner1d


In [2]:
tsla = yf.download("TSLA",period='2y')
spy = yf.download("SPY",period='2y')

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


In [13]:
def exp_smooth_data(data):

  smooth_list = []

  #first value y for s 
  smooth_list.append(data.iloc[0])

  for v in data[1:]:
    st = 0.2 * v + (1 - 0.2) * smooth_list[-1]
    smooth_list.append(st)

  return smooth_list 

def computeRSI(data):
    diff = data.diff(1).dropna()        # diff in one field(one day)

    #this preservers dimensions off diff values
    up_chg = 0 * diff
    down_chg = 0 * diff
    
    # up change is equal to the positive difference, otherwise equal to zero
    up_chg[diff > 0] = diff[ diff > 0]
    
    # down change is equal to negative deifference, otherwise equal to zero
    down_chg[diff < 0] = diff[ diff < 0 ]
    
    # check pandas documentation for ewm
    # we set com=time_window-1 so we get decay alpha=1/time_window

    up_chg_avg   = up_chg.ewm(com=14-1 , min_periods=14).mean()
    down_chg_avg = down_chg.ewm(com=14-1 , min_periods=14).mean()
    
    
    rs = abs(up_chg_avg/down_chg_avg)
    rsi = 100 - 100/(1+rs)
    return rsi

def get_features(tsla,spy):

  df_smoothed = pd.DataFrame()

  close_smooth = exp_smooth_data(tsla['Close'])

  df_smoothed['close_smooth'] = close_smooth

  open_smooth = exp_smooth_data(tsla['Open'])

  df_smoothed['Open_smooth'] = open_smooth

  high_smooth = exp_smooth_data(tsla['High'])

  df_smoothed['high_smooth'] = high_smooth

  low_smooth = exp_smooth_data(tsla['Low'])

  df_smoothed['low_smooth'] = low_smooth

  volume_smooth = exp_smooth_data(tsla['Volume'])

  df_smoothed['volume_smooth'] = volume_smooth

  df_sp_smoothed = pd.DataFrame()

  close_smooth_sp = exp_smooth_data(spy['Close'])

  df_sp_smoothed['close_smooth'] = close_smooth_sp

  df_smoothed['rsi'] = computeRSI(df_smoothed['close_smooth'])
  df_smoothed['william'] = (df_smoothed['high_smooth'].rolling(14).max() - df_smoothed['close_smooth'])/(df_smoothed['high_smooth'].rolling(14).max() - df_smoothed['low_smooth'].rolling(14).min()) * -100
  df_smoothed['stch_osc'] = 100 * (df_smoothed['close_smooth'] - df_smoothed['low_smooth'].rolling(14).min())/(df_smoothed['high_smooth'].rolling(14).max() - df_smoothed['low_smooth'].rolling(14).min())
  df_smoothed['price_rate_change'] = (df_smoothed['close_smooth'] - df_smoothed['close_smooth'].shift(14))/df_smoothed['close_smooth'].shift(14)
  df_smoothed['log_price'] = np.log(df_smoothed['close_smooth'])
  df_smoothed['log_mov'] = df_smoothed['log_price'].rolling(6).mean()
  df_smoothed['log_diff'] = df_smoothed['log_price'] - df_smoothed['log_mov']
  df_smoothed['fast_mov'] = df_smoothed['close_smooth'].rolling(3).mean()
  df_smoothed['slow_mov'] = df_smoothed['close_smooth'].rolling(7).mean()
  df_smoothed['mov_diff'] = df_smoothed['fast_mov'] - df_smoothed['slow_mov']
  df_smoothed['mac_fast'] = df_smoothed['close_smooth'].rolling(7).mean()
  df_smoothed['mac_slow'] = df_smoothed['close_smooth'].rolling(20).mean()
  df_smoothed['mac_diff'] = df_smoothed['mac_fast'] - df_smoothed['mac_slow']
  df_smoothed['volume_log'] = np.log(df_smoothed['volume_smooth'])
  df_smoothed['pct_change'] = df_smoothed['close_smooth'].pct_change()
  df_smoothed['z_score'] = (df_smoothed['close_smooth'] - df_smoothed['close_smooth'].rolling(7).mean())/df_smoothed['close_smooth'].std()
  df_smoothed['sp_return'] = df_sp_smoothed['close_smooth'].pct_change(14)
  df_smoothed['return_two_week'] = df_smoothed['close_smooth'].pct_change(14)
  df_smoothed['return_day'] = df_smoothed['close_smooth'].pct_change(1)
  df_smoothed['return_month'] = df_smoothed['close_smooth'].pct_change(5)
  df_smoothed['return_two_day'] = df_smoothed['close_smooth'].pct_change(2)
  df_smoothed['return_week'] = df_smoothed['close_smooth'].pct_change(5)
  df_smoothed['return_diff_sp'] = df_smoothed['return_day'] - df_smoothed['sp_return']
  df_smoothed['return_sp_std'] = df_smoothed['sp_return'].rolling(14).std()
  df_smoothed['return_std'] = df_smoothed['return_week'].rolling(14).std()
  df_smoothed['last_close'] = df_smoothed['close_smooth'].shift(14)
  df_smoothed['last_open'] = df_smoothed['Open_smooth'].shift(14)
  df_smoothed['last_high'] = df_smoothed['high_smooth'].shift(14)
  df_smoothed['last_low'] = df_smoothed['low_smooth'].shift(14)
  df_smoothed['high_low'] = df_smoothed['last_high'] - df_smoothed['last_low']
  df_smoothed.head()

  df_smoothed = df_smoothed.dropna()

  df_complete = df_smoothed

  df_complete['win'] = np.where((df_complete['close_smooth'].shift(-5) > df_complete['close_smooth']), 1, 0)
  return df_complete

def get_bearish_or_bullish_signal(df_complete):
  train = df_complete.drop(columns=['win'])
  train_labels = df_complete['win']
  fs = FeatureSelector(data=train, labels=train_labels)
  fs.identify_collinear(correlation_threshold=0.975)
  fs.identify_zero_importance(task='regression',eval_metric='auc',n_iterations=10,early_stopping=True)
  fs.identify_low_importance(cumulative_importance = 0.99)
  all_to_remove = fs.check_removal()
  clean_removed_db = train.drop(columns = all_to_remove)
  split = len(clean_removed_db)-1
  X_train, X_test = clean_removed_db, clean_removed_db[split:]
  y_train = train_labels
  classifier = RandomForestClassifier(n_estimators=3,max_depth=30,max_features='sqrt',min_samples_leaf=27,min_samples_split=2,random_state=42)
  classifier.fit(X_train,y_train)
  y_pred = classifier.predict(X_test)
  return y_pred

  
def buy_or_sell_signal():
  data = get_features(tsla,spy)
  signal = get_bearish_or_bullish_signal(data)

  prediction = signal.tolist()
  return prediction

In [26]:
def rf_trading_algo():
    
    
    os.environ["APCA_API_BASE_URL"] = "https://paper-api.alpaca.markets"

    api = tradeapi.REST('asdf','asdf',api_version='v2') # or use ENV Vars shown below
    account = api.get_account()

    sender_address = 'greenstreetquantitative@gmail.com'
    sender_pass = 'asdf!'
    receiver_address = 'sentive.landry@gmail.com'

    message = MIMEMultipart()
    message['From'] = 'Algo Bot'
    message['To'] = receiver_address
    message['Subject'] = 'Random Forest Algo'

    portfolio = api.list_positions()
    clock = api.get_clock()
    cash = float(account.buying_power)

    symbols = "TSLA"
    
    number_of_shares = cash * 0.50

    signal = buy_or_sell_signal()

    if clock.is_open == True:
      if bool(portfolio) == False:
        if signal == 1:
          api.submit_order(symbol=symbols, qty=number_of_shares,side='buy',type='market',time_in_force='day')
          mail_content = "Bought shares of TSLA"
      else:
        if signal == 0:
          api.close_position(symbols)
          mail_content = "Closed positions of TSLA"
    else:
      mail_content = "The Market Is Closed"

    message.attach(MIMEText(mail_content, 'plain'))

    session = smtplib.SMTP('smtp.gmail.com', 587)
    session.starttls() 
    session.login(sender_address, sender_pass) 
    text = message.as_string()
    session.sendmail(sender_address, receiver_address, text)
    session.quit()
    
    done = 'Mail Sent'
    return done

In [27]:
rf_trading_algo()

17 features with a correlation magnitude greater than 0.97.

Training Gradient Boosting Model

Training until validation scores don't improve for 100 rounds.
Early stopping, best iteration is:
[233]	valid_0's auc: 0.969156
Training until validation scores don't improve for 100 rounds.
Early stopping, best iteration is:
[478]	valid_0's auc: 0.973684
Training until validation scores don't improve for 100 rounds.
Early stopping, best iteration is:
[539]	valid_0's auc: 0.977273
Training until validation scores don't improve for 100 rounds.
Early stopping, best iteration is:
[330]	valid_0's auc: 0.979708
Training until validation scores don't improve for 100 rounds.
Early stopping, best iteration is:
[186]	valid_0's auc: 0.977778
Training until validation scores don't improve for 100 rounds.
Early stopping, best iteration is:
[316]	valid_0's auc: 0.972493
Training until validation scores don't improve for 100 rounds.
Early stopping, best iteration is:
[350]	valid_0's auc: 0.994387
Training 

'Mail Sent'