In [1]:
!pip install ta

Collecting ta
  Downloading ta-0.11.0.tar.gz (25 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: ta
  Building wheel for ta (setup.py) ... [?25l[?25hdone
  Created wheel for ta: filename=ta-0.11.0-py3-none-any.whl size=29412 sha256=497a3036f605922df985c790ad9ea1fc99914bc4aa767508ea63fb4b243537b6
  Stored in directory: /root/.cache/pip/wheels/5c/a1/5f/c6b85a7d9452057be4ce68a8e45d77ba34234a6d46581777c6
Successfully built ta
Installing collected packages: ta
Successfully installed ta-0.11.0


In [2]:
import yfinance as yf
import pandas as pd
import numpy as np
import ta

tickers = [
    'HDFCBANK.NS', 'ICICIBANK.NS' , 'INFY.NS','KOTAKBANK.NS','CUB.NS','UNIONBANK.NS','BANKINDIA.NS',
     'TCS.NS', 'LT.NS', 'ITC.NS', 'SBIN.NS', 'AXISBANK.NS','PNB.NS','CANBK.NS','BANKBARODA.NS','INDUSINDBK.NS'
]

data = yf.download(tickers, start="2005-01-01", end="2019-01-01", group_by="ticker")

def create_features(df):
    df = df.copy()

    # Basic close & volume
    df["Close_t"] = df["Close"]
    df["Volume_t"] = df["Volume"]

    #  EMA
    df["EMA_5"] = df["Close"].ewm(span=5, adjust=False).mean().shift(1)
    df["EMA_20"] = df["Close"].ewm(span=20, adjust=False).mean().shift(1)

    # RSI
    df["RSI_10"] = ta.momentum.RSIIndicator(df["Close"], window=10).rsi()

    # MACD
    macd = ta.trend.MACD(df["Close"], window_slow=26, window_fast=12, window_sign=9)
    df["MACD"] = macd.macd()

    # Bollinger Bandwidth
    bb = ta.volatility.BollingerBands(df["Close"], window=20, window_dev=2)
    df["Bollinger_bandwidth"] = (bb.bollinger_hband() - bb.bollinger_lband()) / bb.bollinger_mavg()

    # OBV
    df["OBV"] = ta.volume.OnBalanceVolumeIndicator(df["Close"], df["Volume"]).on_balance_volume()

    # Volatility & High-Low Range
    df["Volatility_5"] = df["Close"].pct_change().rolling(5).std().shift(1)
    df["Volatility_20"] = df["Close"].pct_change().rolling(20).std().shift(1)
    df["Volatility_60"] = df["Close"].pct_change().rolling(60).std().shift(1)

    # Momentum
    df["Momentum_3"] = df["Close"] - df["Close"].shift(3)
    df["Momentum_5"] = df["Close"] - df["Close"].shift(5)

    # Volume features
    df["Volume_SMA_5"] = df["Volume"].rolling(5).mean().shift(1)
    df["Volume_SMA_20"] = df["Volume"].rolling(20).mean().shift(1)
    df["Volume_Ratio"] = df["Volume"] / df["Volume"].rolling(5).mean().shift(1)

    df["High_Low_Range"] = (df["Close"] - df["Close"].shift(1)) / df["Close"].shift(1)   # daily high-low approx
    df["Rolling_MaxDrawdown_30"] = (df["Close"] / df["Close"].rolling(30).max() - 1).shift(1)
    df["Rolling_MaxDrawdown_60"] = (df["Close"] / df["Close"].rolling(60).max() - 1).shift(1)

    # Volatility Regime Feature
    rolling_vol = df["Close"].pct_change().rolling(20).std().shift(1)
    threshold = rolling_vol.median()
    df["High_Volatility_Flag"] = (rolling_vol > threshold).astype(int)
    df = df.drop(['Close','Volume'],axis =1)
    return df
all_features = []

for ticker in tickers:
    stock_df = data[ticker].dropna()
    feat_df = create_features(stock_df)
    feat_df["Ticker"] = ticker
    feat_df["Target_Close_t+1"] = feat_df["Close_t"].shift(-1)
    feat_df = feat_df[(feat_df["Close_t"] != 0) & (feat_df["Volume_t"] != 0)]
    feat_df = feat_df.dropna().reset_index().rename(columns={"index": "Date"})
    all_features.append(feat_df)

final_df = pd.concat(all_features)
final_df = final_df.dropna().reset_index(drop=True)
final_df.to_csv("stock_features_dataset.csv", index=False)

print("Dataset created with shape:", final_df.shape)
print(final_df.head(10))


  data = yf.download(tickers, start="2005-01-01", end="2019-01-01", group_by="ticker")
[*********************100%***********************]  16 of 16 completed


Dataset created with shape: (54142, 26)
Price       Date       Open       High        Low    Close_t  Volume_t  \
0     2005-04-01  21.855559  22.295873  21.655416  22.077717   3359660   
1     2005-04-04  22.095727  22.171781  21.625393  22.103733   2532680   
2     2005-04-05  22.295875  22.496017  21.497307  21.613388   3840540   
3     2005-04-06  21.465280  22.295872  21.465280  22.149767   4253720   
4     2005-04-07  22.415960  22.694158  22.355917  22.542049   5089240   
5     2005-04-08  22.574065  22.616095  21.535326  21.881573   4854300   
6     2005-04-11  22.728174  22.728174  21.817527  22.297869   4174340   
7     2005-04-12  22.375927  22.413953  22.015670  22.355911   2452720   
8     2005-04-13  22.616096  22.956338  22.295869  22.473995  10028860   
9     2005-04-15  22.455982  22.484001  21.335184  21.557343   5603660   

Price      EMA_5     EMA_20     RSI_10      MACD  ...  Momentum_5  \
0      21.600228  22.379404  46.146581 -0.378468  ...    1.284914   
1      