In [None]:
import yfinance as yf

ticker = "TATAMOTORS.NS"


data = yf.download(ticker, start="2020-01-01", end="2025-01-01")



In [None]:
import numpy as np
import pandas as pd


if isinstance(data.columns, pd.MultiIndex):
    data.columns = [col[0] if isinstance(col, tuple) else col for col in data.columns]

if "Close" not in data.columns and "Adj Close" in data.columns:
    data["Close"] = data["Adj Close"]

if "Volume" not in data.columns:
    raise KeyError("No 'Volume' column found in data. Make sure you downloaded OHLCV data.")


for c in ["Open", "High", "Low", "Close", "Adj Close", "Volume"]:
    if c in data.columns:
        data[c] = pd.to_numeric(data[c], errors="coerce")


data["returns"] = data["Close"].pct_change()

data["MA5"] = data["Close"].rolling(window=5).mean()
data["MA20"] = data["Close"].rolling(window=20).mean()
data["MA50"] = data["Close"].rolling(window=50).mean()

delta = data["Close"].diff()
gain = delta.clip(lower=0)
loss = -delta.clip(upper=0)

avg_gain = gain.rolling(14).mean()
avg_loss = loss.rolling(14).mean()


rs = avg_gain / avg_loss.replace(0, np.nan)
data["RSI"] = 100 - (100 / (1 + rs))


data["Volume_Change"] = data["Volume"].pct_change()


data["Target"] = (data["Close"].shift(-5) > data["Close"]).astype(int)


data["Return_lag1"] = data["returns"].shift(1)
data["Return_lag2"] = data["returns"].shift(2)
data["Return_lag5"] = data["returns"].shift(5)


data["Volatility_5"]  = data["returns"].rolling(window=5).std()
data["Volatility_10"] = data["returns"].rolling(window=10).std()
data["Volatility_20"] = data["returns"].rolling(window=20).std()


ma20 = data["Close"].rolling(20).mean()
std20 = data["Close"].rolling(20).std()
data["Bollinger_Upper"] = ma20 + 2 * std20
data["Bollinger_Lower"] = ma20 - 2 * std20
data["Bollinger_Width"] = (data["Bollinger_Upper"] - data["Bollinger_Lower"]) / ma20.replace(0, np.nan)


ema12 = data["Close"].ewm(span=12, adjust=False).mean()
ema26 = data["Close"].ewm(span=26, adjust=False).mean()
data["MACD"] = ema12 - ema26
data["MACD_Signal"] = data["MACD"].ewm(span=9, adjust=False).mean()


data = data.dropna().copy()


-- DATA PROCESSING --

In [None]:
from tabulate import tabulate

print(tabulate(data.head(), headers="keys", tablefmt="psql"))

-- X AND Y SPLITING --

In [None]:
import matplotlib.pyplot as plt


X = data[[
    "returns", "MA5", "MA20", "MA50",
    "RSI", "Volume_Change",
    "Return_lag1", "Return_lag2", "Return_lag5",
    "Volatility_5", "Volatility_10", "Volatility_20",
    "Bollinger_Width",   # keep width, drop upper/lower
    "MACD"               # keep MACD, drop MACD_Signal
]].copy()

y = data["Target"].copy()

corr = X.corr()

plt.figure(figsize=(10, 8))
im = plt.imshow(corr.values, aspect='auto')
plt.colorbar(im)
cols = corr.columns.tolist()
plt.xticks(range(len(cols)), cols, rotation=90)
plt.yticks(range(len(cols)), cols)
plt.title("Feature Correlation Heatmap")
plt.tight_layout()
plt.show()


In [None]:
import matplotlib.pyplot as plt


features = X.columns.tolist()

plt.figure(figsize=(14, 10))


for i, col in enumerate(features, 1):
    plt.subplot(len(features)+1, 1, i)   # +1 for Target (y)
    plt.plot(data.index, X[col], label=col)
    plt.title(col, fontsize=10)
    plt.grid(True)


plt.subplot(len(features)+1, 1, len(features)+1)
plt.plot(data.index, y, label="Target (Up=1, Down=0)", color="red")
plt.title("Target", fontsize=10)
plt.grid(True)

plt.tight_layout()
plt.show()


-- ML ALGORITHM --

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, shuffle=False
)

print("Train size:", X_train.shape)
print("Test size:", X_test.shape)


In [None]:
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score

# Create model
xgb = XGBClassifier(
    n_estimators=100,
    max_depth=3,          # shallower trees
    learning_rate=0.1,    # faster learning
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=42,
    use_label_encoder=False,
    eval_metric="logloss"
)


# Train
xgb.fit(X_train, y_train)

# Predict
y_pred = xgb.predict(X_test)

# Accuracy
print("XGBoost Accuracy:", accuracy_score(y_test, y_pred))
