In [29]:
%pip install pandas_ta



In [30]:
import yfinance as yf
import pandas as pd
import os
import pandas_ta as ta
import numpy as np
from sklearn.preprocessing import StandardScaler

The strategy here is identical to the one in final.py .

In [57]:
ticker="ITC.NS"      #Set any, with available data on yfinance, to test.

In [32]:
def data(ticker):
  df = yf.Ticker(ticker)
  df = df.history(period="max")
  df.index = pd.to_datetime(df.index)
  del df["Dividends"]
  del df["Stock Splits"]
  df = df.loc["2014-01-01":].copy()
  df["Target"] = (df["Close"] > df["Open"]).astype(int).shift(-1)   ##Binary target...tomorrow increase or decrease direction as an output to be decided by todays statistics
  # frequency = df['Target'].value_counts()
  # print(frequency)
  df.dropna(inplace=True)
  return df

In [33]:
# Calculating technical indicators using pandas_ta
def add_features(df):
    df['EMA_2'] = ta.ema(df['Close'], length=2)
    df['EMA_5'] = ta.ema(df['Close'], length=5)
    df['EMA_20'] = ta.ema(df['Close'], length=20)
    df['EMA_100'] = ta.ema(df['Close'], length=100)

    df['RSI_14'] = ta.rsi(df['Close'], length=14)
    macd = ta.macd(df['Close'])
    df['MACD_signal'] = macd['MACDs_12_26_9']
    df['ATR'] = ta.atr(df['High'], df['Low'], df['Close'], length=14)
    df['OBV'] = ta.obv(df['Close'], df['Volume'])

    # Lagged features (kind of last few rows to predict the next row)
    for lag in range(1, 10):
    # lag-=1
        df[f'Close_t-{lag}'] = df['Close'].shift(lag)
        df[f'Open_t-{lag}'] = df['Open'].shift(lag)
        df[f'High_t-{lag}'] = df['High'].shift(lag)
        df[f'Low_t-{lag}'] = df['Low'].shift(lag)
        df[f'Volume_t-{lag}'] = df['Volume'].shift(lag)

    df = df.dropna()
    return df

In [58]:
df=data(ticker)
df_added=add_features(df)
df_added.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Target,EMA_2,EMA_5,EMA_20,EMA_100,...,Close_t-8,Open_t-8,High_t-8,Low_t-8,Volume_t-8,Close_t-9,Open_t-9,High_t-9,Low_t-9,Volume_t-9
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2014-05-28 00:00:00+05:30,170.364329,170.463376,168.680498,169.7948,6250384,0.0,169.978528,170.36344,171.878512,166.84613,...,176.777771,183.141682,191.907525,174.573935,38847039.0,182.324524,181.086407,183.092148,180.343539,9562248.0
2014-05-29 00:00:00+05:30,170.859577,172.345312,168.58145,169.62146,15314620,0.0,169.740483,170.116113,171.663554,166.901087,...,167.442413,178.288289,178.783538,166.650022,24233856.0,176.777771,183.141682,191.907525,174.573935,38847039.0
2014-05-30 00:00:00+05:30,170.76058,170.76058,166.476711,169.250076,11046877,0.0,169.413545,169.827434,171.433699,166.947602,...,168.804321,169.37385,175.812041,165.684279,9241525.0,167.442413,178.288289,178.783538,166.650022,24233856.0
2014-06-02 00:00:00+05:30,169.373865,169.918638,166.105251,166.724304,10862329,1.0,167.620718,168.793057,170.985186,166.94318,...,169.200546,168.160536,169.81961,167.739578,10660248.0,168.804321,169.37385,175.812041,165.684279,9241525.0
2014-06-03 00:00:00+05:30,168.060524,169.169841,166.900797,168.110947,10778008,0.0,167.947537,168.565687,170.711448,166.966304,...,171.255798,169.86911,172.097714,168.556702,10527001.0,169.200546,168.160536,169.81961,167.739578,10660248.0


In [59]:
# Define lagged features and technical indicators
lagged_features = [f'Close_t-{i}' for i in range(1, 10)] + [f'Open_t-{i}' for i in range(1, 10)] + [f'Volume_t-{i}' for i in range(1, 10)]
featuresq = ['EMA_2', 'EMA_5','EMA_20','EMA_100', 'RSI_14', 'MACD_signal', 'ATR', 'OBV', 'Close', 'Volume']

# Combine all features
all_features = featuresq + lagged_features
# Feature normalization
scaler = StandardScaler()
df[all_features] = scaler.fit_transform(df[all_features])

# Preparing the features and target
X = df_added[all_features]
y = df_added['Target']

The strategy is simple. The model will be trained for the last 350 days of each day, and predict the direction of price for that day. It will buy accordingly at the opening price of the day and clear the position at the day when prediction is 0. It will always use atmost a certain percentage of cash to place the orders. An order log will be created and each order will be displayed. If cash is not enough to place an order, or clear a position, emergency account balance will be used accordingly and the trading process may get terminated.

In [60]:
#Defining the model, as per in final.py
from sklearn.ensemble import RandomForestClassifier
model3=RandomForestClassifier(n_estimators=500, min_samples_split=19, random_state=6,min_samples_leaf=1, max_features=25,n_jobs=-1)

In [61]:
print("Number of days in the data:", len(df))

Number of days in the data: 2602


In [69]:
#Setting any two indices, both between 351(350 days are required for prediction) and len(df)
#Trading will take place, as per model predictions, between the days defined by these indices.
start=351
end=500
df[start:end]

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Target,EMA_2,EMA_5,EMA_20,EMA_100,...,Close_t-8,Open_t-8,High_t-8,Low_t-8,Volume_t-8,Close_t-9,Open_t-9,High_t-9,Low_t-9,Volume_t-9
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2015-06-10 00:00:00+05:30,154.604527,156.044624,153.575891,-0.908569,-0.453569,0.0,-0.907668,-0.896670,-0.848129,-0.737999,...,-0.789862,-0.822347,166.371322,160.673507,0.334419,-0.823763,-0.845687,162.589597,159.589422,0.349281
2015-06-11 00:00:00+05:30,155.324591,155.324591,151.827222,-0.933108,-0.331831,1.0,-0.925025,-0.909485,-0.856905,-0.742487,...,-0.760129,-0.801344,168.287460,164.127546,-0.408483,-0.789784,-0.822259,166.371322,160.673507,0.334459
2015-06-12 00:00:00+05:30,152.650118,154.373093,151.724345,-0.930079,-0.542216,1.0,-0.928789,-0.917014,-0.864552,-0.746823,...,-0.840618,-0.780942,166.043598,160.144082,-0.348373,-0.760014,-0.801232,168.287460,164.127546,-0.408309
2015-06-15 00:00:00+05:30,152.752980,155.710315,151.210021,-0.924323,-0.540207,1.0,-0.926202,-0.920107,-0.870914,-0.750950,...,-0.889633,-0.833880,161.419301,154.604554,0.401008,-0.840602,-0.780808,166.043598,160.144082,-0.348210
2015-06-16 00:00:00+05:30,153.267303,154.373091,151.852916,-0.921596,-0.573168,1.0,-0.923520,-0.921257,-0.876407,-0.754938,...,-0.906788,-0.887742,157.716167,153.370167,-0.336433,-0.889676,-0.833804,161.419301,154.604554,0.401037
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2016-01-08 00:00:00+05:30,160.056343,163.039400,159.310585,-0.832526,-0.427310,1.0,-0.835227,-0.816803,-0.782879,-0.759324,...,-0.764339,-0.756453,168.696925,166.305333,-0.837525,-0.755948,-0.779269,168.208332,165.842449,-0.482904
2016-01-11 00:00:00+05:30,159.439138,162.936506,159.310554,-0.817984,-0.653102,0.0,-0.824046,-0.817718,-0.786742,-0.760950,...,-0.753924,-0.756147,168.439744,166.768212,-0.809734,-0.764229,-0.756291,168.696925,166.305333,-0.837274
2016-01-12 00:00:00+05:30,162.139337,162.987958,160.724950,-0.823437,-0.647156,0.0,-0.823957,-0.820152,-0.790763,-0.762659,...,-0.746265,-0.762267,169.725553,166.305326,-0.563125,-0.753801,-0.755985,168.439744,166.768212,-0.809488
2016-01-13 00:00:00+05:30,163.219395,164.788076,161.830729,-0.819802,-0.654179,1.0,-0.821502,-0.820558,-0.794050,-0.764258,...,-0.748103,-0.749414,169.211223,167.488260,-1.062972,-0.746133,-0.762112,169.725553,166.305326,-0.562923


In [63]:
def predict(model,i):
    model.fit(X.iloc[i-350:i], y.iloc[i-350:i])
    pred=model.predict(X.iloc[[i]])
    if(pred>0.5):
        return 1
    else:
        return 0


In [68]:
#Lets now set the cash available for trading, account balance, initial portfolio(same as initial cash)
account_balance=50000
cash=50000
portfolio=cash
position=0
buy_price=0
predictions = []
log_file = 'trading_log.txt'
with open(log_file, 'w') as file:
    file.write('Date,Action,Price,Amount,Portfolio Value,Cash,Account Balance\n')
transaction_cost=0.001

In [70]:
#Lets do the backtesting
for i in range(start,end):
  #Updating the account balance after each 100 days
  if((i-start)%100==0):
    account_balance+=10000
  pred=predict(model3,i)      #Can use your own model
  print(f"Date: {i}, Prediction: {pred}")
  predictions.append(pred)
  date = df.index[i]
  open_price = df['Open'].iloc[i]
  value=open_price
  withdraw=0
  if (pred == 1):  # Buy signal
      amount_to_buy = (0.8*cash) // open_price
      req=amount_to_buy * open_price * (1 + transaction_cost)
      if (position == 0):  # No positions open
          # Buy
          if(cash<req):
            action = 'H0LD'
            withdraw=1
          else:
            cash-=req
            withdraw=0
            position = amount_to_buy
            value = open_price
            action = 'BUY'
      else:
          action = 'HOLD'
  else:  # Sell signal
      if position > 0:  # Position open
          # Sell
          cash += position * open_price * (1 + transaction_cost)
          value=open_price
          position = 0
          action = 'SELL'
      else:
          action = 'HOLD'

  # Print order details and portfolio value
  portfolio = cash + position * value
  print(f"Date: {date}, Action: {action}, Price: {value:.2f}, Position: {position:.2f}, Portfolio Value: ${portfolio:.2f}, Cash: ${cash:.2f}, Account Balance: ${account_balance:.2f}")

  # Log the order details
  with open(log_file, 'a') as file:
      file.write(f'{date},{action},{value:.2f},{position:.2f},{portfolio:.2f},{cash:.2f},{account_balance:.2f}\n')

  # Calculate current portfolio value(at end of day)
  close_price = df['Close'].iloc[i]
  value=close_price
  if(portfolio+15000<0):       #Kind of stop loss
        # Sell
        cash += position * close_price * (1 + transaction_cost)
        position = 0
        action = 'SELL'
        portfolio = cash + position * value
        print(f"Date: {date}, Action: {action}, Price: {value:.2f}, Position: {position:.2f}, Portfolio Value: ${portfolio:.2f}, Cash: ${cash:.2f}, Account Balance: ${account_balance:.2f}")

        # Log the order details
        with open(log_file, 'a') as file:
          file.write(f'{date},{action},{value:.2f},{position:.2f},{portfolio:.2f}\n')


  if(withdraw==1):
    req=amount_to_buy * open_price * (1 + transaction_cost)+10000
    if(account_balance<req+5000):
      continue
    else:
      cash+=req
      account_balance-=req





Date: 351, Prediction: 0
Date: 2015-06-10 00:00:00+05:30, Action: HOLD, Price: 0.00, Position: 0.00, Portfolio Value: $40000.00, Cash: $40000.00, Account Balance: $130000.00
Date: 352, Prediction: 0
Date: 2015-06-11 00:00:00+05:30, Action: HOLD, Price: 0.00, Position: 0.00, Portfolio Value: $40000.00, Cash: $40000.00, Account Balance: $130000.00
Date: 353, Prediction: 0
Date: 2015-06-12 00:00:00+05:30, Action: HOLD, Price: 0.00, Position: 0.00, Portfolio Value: $40000.00, Cash: $40000.00, Account Balance: $130000.00
Date: 354, Prediction: 0
Date: 2015-06-15 00:00:00+05:30, Action: HOLD, Price: 0.00, Position: 0.00, Portfolio Value: $40000.00, Cash: $40000.00, Account Balance: $130000.00
Date: 355, Prediction: 0
Date: 2015-06-16 00:00:00+05:30, Action: HOLD, Price: 0.00, Position: 0.00, Portfolio Value: $40000.00, Cash: $40000.00, Account Balance: $130000.00
Date: 356, Prediction: 0
Date: 2015-06-17 00:00:00+05:30, Action: HOLD, Price: 0.00, Position: 0.00, Portfolio Value: $40000.00, C