<a href="https://colab.research.google.com/github/Praveen-ctrl-tech/Stock-Price-Prediction-using-Machine-Learning-in-Python/blob/main/Untitled3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import warnings
warnings.filterwarnings("ignore")

import yfinance as yf
import pandas as pd
import numpy as np

# ML imports
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from xgboost import XGBClassifier

# Metrics
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    roc_auc_score,
    confusion_matrix,
    classification_report,
)


# ------------------------------------------------------------
# 1. TECHNICAL INDICATORS (ERROR-FREE)
# ------------------------------------------------------------
def add_indicators(df):

    df["Return"] = df["Close"].pct_change()

    # RSI
    delta = df["Close"].diff()
    gain = delta.clip(lower=0)
    loss = -delta.clip(upper=0)
    avg_gain = gain.rolling(14).mean()
    avg_loss = loss.rolling(14).mean()
    rs = avg_gain / (avg_loss + 1e-9)
    df["RSI"] = 100 - (100 / (1 + rs))

    # MACD
    ema12 = df["Close"].ewm(span=12, adjust=False).mean()
    ema26 = df["Close"].ewm(span=26, adjust=False).mean()
    df["MACD"] = ema12 - ema26
    df["Signal"] = df["MACD"].ewm(span=9, adjust=False).mean()

    # Bollinger Bands (all single-column safe operations)
    df["MA20"] = df["Close"].rolling(20).mean()
    df["BB_std"] = df["Close"].rolling(20).std()
    df["BB_up"] = df["MA20"] + 2 * df["BB_std"]
    df["BB_low"] = df["MA20"] - 2 * df["BB_std"]
    df["BB_width"] = df["BB_up"] - df["BB_low"]

    # ATR
    high_low = df["High"] - df["Low"]
    high_close_prev = (df["High"] - df["Close"].shift()).abs()
    low_close_prev = (df["Low"] - df["Close"].shift()).abs()
    tr = pd.concat([high_low, high_close_prev, low_close_prev], axis=1).max(axis=1)
    df["ATR"] = tr.rolling(14).mean()

    # Lagged returns
    df["Return_lag1"] = df["Return"].shift(1)
    df["Return_lag3"] = df["Return"].shift(3)
    df["Return_lag5"] = df["Return"].shift(5)

    df.dropna(inplace=True)
    return df


# ------------------------------------------------------------
# 2. IMPROVED LABEL
# ------------------------------------------------------------
def create_labels(df, up=0.002, down=-0.002):
    df["Next_Return"] = df["Close"].pct_change().shift(-1)

    df["Target"] = np.nan
    df.loc[df["Next_Return"] > up, "Target"] = 1
    df.loc[df["Next_Return"] < down, "Target"] = 0

    df.dropna(inplace=True)
    df["Target"] = df["Target"].astype(int)

    return df


# ------------------------------------------------------------
# 3. LOAD + PREPARE DATA
# ------------------------------------------------------------
df = yf.download("ASIANPAINT.NS", period="25y")
df.dropna(inplace=True)

df = add_indicators(df)
df = create_labels(df)

features = [
    "Return", "RSI", "MACD", "Signal",
    "MA20", "BB_width", "ATR",
    "Return_lag1", "Return_lag3", "Return_lag5"
]

X = df[features]
y = df["Target"]

# Train-test split (time-based)
split = int(len(df) * 0.8)
X_train, X_test = X.iloc[:split], X.iloc[split:]
y_train, y_test = y.iloc[:split], y.iloc[split:]

# Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


# ------------------------------------------------------------
# 4. MODELS
# ------------------------------------------------------------

# Logistic Regression
lr = LogisticRegression(max_iter=1000)
lr.fit(X_train_scaled, y_train)
pred_lr = lr.predict(X_test_scaled)
proba_lr = lr.predict_proba(X_test_scaled)[:, 1]

# SVM
svm = SVC(kernel="rbf", probability=True)
svm.fit(X_train_scaled, y_train)
pred_svm = svm.predict(X_test_scaled)
proba_svm = svm.predict_proba(X_test_scaled)[:, 1]

# XGBoost
xgb = XGBClassifier(
    n_estimators=300,
    max_depth=5,
    learning_rate=0.05,
    subsample=0.8,
    colsample_bytree=0.8,
    eval_metric="logloss"
)
xgb.fit(X_train, y_train)
pred_xgb = xgb.predict(X_test)
proba_xgb = xgb.predict_proba(X_test)[:, 1]


# ------------------------------------------------------------
# 5. EVALUATION FUNCTION
# ------------------------------------------------------------
def evaluate(name, y_true, y_pred, y_proba):
    print(f"\n=========== {name} ===========")
    print("Accuracy     :", accuracy_score(y_true, y_pred))
    print("Precision    :", precision_score(y_true, y_pred))
    print("Recall       :", recall_score(y_true, y_pred))
    print("F1 Score     :", f1_score(y_true, y_pred))
    print("ROC-AUC      :", roc_auc_score(y_true, y_proba))
    print("\nConfusion Matrix:\n", confusion_matrix(y_true, y_pred))
    print("\nClassification Report:\n", classification_report(y_true, y_pred))


# ------------------------------------------------------------
# 6. PRINT METRICS FOR ALL MODELS
# ------------------------------------------------------------
evaluate("Logistic Regression", y_test, pred_lr, proba_lr)
evaluate("SVM", y_test, pred_svm, proba_svm)
evaluate("XGBoost", y_test, pred_xgb, proba_xgb)


# ------------------------------------------------------------
# 7. NEXT-DAY PREDICTION
# ------------------------------------------------------------
latest = X.tail(1)

print("\nNEXT DAY PREDICTION:")
print("Logistic Regression:", "UP" if lr.predict(scaler.transform(latest))[0] == 1 else "DOWN")
print("SVM:", "UP" if svm.predict(scaler.transform(latest))[0] == 1 else "DOWN")
print("XGBoost:", "UP" if xgb.predict(latest)[0] == 1 else "DOWN")


[*********************100%***********************]  1 of 1 completed



Accuracy     : 0.48723186925434114
Precision    : 0.4917293233082707
Recall       : 0.6659877800407332
F1 Score     : 0.5657439446366782
ROC-AUC      : 0.48329771960869417

Confusion Matrix:
 [[150 338]
 [164 327]]

Classification Report:
               precision    recall  f1-score   support

           0       0.48      0.31      0.37       488
           1       0.49      0.67      0.57       491

    accuracy                           0.49       979
   macro avg       0.48      0.49      0.47       979
weighted avg       0.48      0.49      0.47       979


Accuracy     : 0.501532175689479
Precision    : 0.5025906735751295
Recall       : 0.5926680244399185
F1 Score     : 0.5439252336448598
ROC-AUC      : 0.4993447631130847

Confusion Matrix:
 [[200 288]
 [200 291]]

Classification Report:
               precision    recall  f1-score   support

           0       0.50      0.41      0.45       488
           1       0.50      0.59      0.54       491

    accuracy                  