# Indian Stock Market Analysis & Prediction System
## Stock: 717503.BO (2014-2020)
### Mini Project 7: Market Trends & Future Price Prediction

In [1]:
# Install required packages
!pip install pandas numpy matplotlib seaborn scikit-learn tensorflow gradio plotly



In [2]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import gradio as gr
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.ensemble import RandomForestRegressor

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

print("All libraries imported successfully!")

All libraries imported successfully!


In [3]:
# Load the dataset
df = pd.read_csv('717503.BO.csv')
print("Dataset Shape:", df.shape)
print("\nFirst 5 rows:")
print(df.head())

Dataset Shape: (1565, 7)

First 5 rows:
         Date  Open  High  Low  Close  Adj Close    Volume
0  2014-03-19   8.1   8.1  8.1    8.1   5.443645      3080
1  2014-03-20   6.5   7.8  6.5    7.5   5.040412  39413222
2  2014-03-21   7.5   7.6  7.4    7.5   5.040412   1352554
3  2014-03-24   7.5   7.6  7.1    7.3   4.906001  10788655
4  2014-03-25   7.4   7.8  7.3    7.4   4.973207   4483967


In [4]:
# Data Preprocessing
def preprocess_data(df):
    df['Date'] = pd.to_datetime(df['Date'])
    df = df.sort_values('Date').reset_index(drop=True)

    df['Year'] = df['Date'].dt.year
    df['Month'] = df['Date'].dt.month
    df['Day'] = df['Date'].dt.day
    df['DayOfWeek'] = df['Date'].dt.dayofweek
    df['Quarter'] = df['Date'].dt.quarter

    df['Daily_Return'] = df['Close'].pct_change()
    df['Price_Range'] = df['High'] - df['Low']
    df['Price_Change'] = df['Close'] - df['Open']

    df['MA_7'] = df['Close'].rolling(window=7).mean()
    df['MA_21'] = df['Close'].rolling(window=21).mean()
    df['MA_50'] = df['Close'].rolling(window=50).mean()
    df['MA_200'] = df['Close'].rolling(window=200).mean()

    df['EMA_12'] = df['Close'].ewm(span=12).mean()
    df['EMA_26'] = df['Close'].ewm(span=26).mean()

    df['MACD'] = df['EMA_12'] - df['EMA_26']
    df['Signal_Line'] = df['MACD'].ewm(span=9).mean()

    delta = df['Close'].diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
    rs = gain / loss
    df['RSI'] = 100 - (100 / (1 + rs))

    df['BB_Middle'] = df['Close'].rolling(window=20).mean()
    df['BB_Upper'] = df['BB_Middle'] + 2 * df['Close'].rolling(window=20).std()
    df['BB_Lower'] = df['BB_Middle'] - 2 * df['Close'].rolling(window=20).std()

    df['Volatility'] = df['Daily_Return'].rolling(window=21).std()

    return df

df = preprocess_data(df)
print("Data preprocessing completed!")

Data preprocessing completed!


In [5]:
# Detect Market Trends
def detect_market_trends(df):
    df['Trend'] = 'Neutral'
    bullish_mask = (df['Close'] > df['MA_50']) & (df['MA_50'] > df['MA_200'])
    df.loc[bullish_mask, 'Trend'] = 'Bullish'
    bearish_mask = (df['Close'] < df['MA_50']) & (df['MA_50'] < df['MA_200'])
    df.loc[bearish_mask, 'Trend'] = 'Bearish'
    return df

df = detect_market_trends(df)
print("Trend detection completed!")

Trend detection completed!


In [6]:
# Prepare ML Data
def prepare_ml_data(df):
    df_clean = df.dropna().copy()
    features = ['Open', 'High', 'Low', 'Volume', 'MA_7', 'MA_21', 'MA_50', 'RSI', 'MACD', 'Volatility', 'Price_Range']
    X = df_clean[features].values
    y = df_clean['Close'].values

    scaler_X = MinMaxScaler()
    scaler_y = MinMaxScaler()
    X_scaled = scaler_X.fit_transform(X)
    y_scaled = scaler_y.fit_transform(y.reshape(-1, 1))

    return X_scaled, y_scaled, scaler_X, scaler_y, df_clean

X, y, scaler_X, scaler_y, df_clean = prepare_ml_data(df)
split_idx = int(len(X) * 0.8)
X_train, X_test = X[:split_idx], X[split_idx:]
y_train, y_test = y[:split_idx], y[split_idx:]
print(f"Training samples: {len(X_train)}, Testing samples: {len(X_test)}")

Training samples: 1092, Testing samples: 274


In [7]:
# Train Random Forest
print("Training Random Forest...")
rf_model = RandomForestRegressor(n_estimators=100, random_state=42, n_jobs=-1)
rf_model.fit(X_train, y_train.ravel())

y_pred_test_rf = rf_model.predict(X_test)
y_pred_test_rf = scaler_y.inverse_transform(y_pred_test_rf.reshape(-1, 1))
y_test_actual = scaler_y.inverse_transform(y_test)

rf_test_rmse = np.sqrt(mean_squared_error(y_test_actual, y_pred_test_rf))
rf_test_mae = mean_absolute_error(y_test_actual, y_pred_test_rf)
rf_test_r2 = r2_score(y_test_actual, y_pred_test_rf)
print(f"RF RMSE: {rf_test_rmse:.4f}, MAE: {rf_test_mae:.4f}, R²: {rf_test_r2:.4f}")

Training Random Forest...
RF RMSE: 0.9955, MAE: 0.5487, R²: -0.0986


In [8]:
# Prepare LSTM Data
def create_lstm_dataset(X, y, time_steps=60):
    Xs, ys = [], []
    for i in range(len(X) - time_steps):
        Xs.append(X[i:(i + time_steps)])
        ys.append(y[i + time_steps])
    return np.array(Xs), np.array(ys)

time_steps = 60
X_lstm, y_lstm = create_lstm_dataset(X, y, time_steps)
split_idx = int(len(X_lstm) * 0.8)
X_train_lstm, X_test_lstm = X_lstm[:split_idx], X_lstm[split_idx:]
y_train_lstm, y_test_lstm = y_lstm[:split_idx], y_lstm[split_idx:]
print(f"LSTM Training: {len(X_train_lstm)}, Testing: {len(X_test_lstm)}")

LSTM Training: 1044, Testing: 262


In [9]:
# Build and Train LSTM
print("Building LSTM Model...")
lstm_model = Sequential([
    LSTM(50, return_sequences=True, input_shape=(time_steps, X.shape[1])),
    Dropout(0.2),
    LSTM(50, return_sequences=True),
    Dropout(0.2),
    LSTM(50),
    Dropout(0.2),
    Dense(1)
])

lstm_model.compile(optimizer='adam', loss='mse', metrics=['mae'])
early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

print("Training LSTM...")
history = lstm_model.fit(X_train_lstm, y_train_lstm, validation_data=(X_test_lstm, y_test_lstm), epochs=50, batch_size=32, callbacks=[early_stop], verbose=1)

Building LSTM Model...
Training LSTM...
Epoch 1/50
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 102ms/step - loss: 0.1329 - mae: 0.2695 - val_loss: 0.0319 - val_mae: 0.1677
Epoch 2/50
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 86ms/step - loss: 0.0149 - mae: 0.0970 - val_loss: 0.0158 - val_mae: 0.1153
Epoch 3/50
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 115ms/step - loss: 0.0100 - mae: 0.0790 - val_loss: 0.0055 - val_mae: 0.0435
Epoch 4/50
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 81ms/step - loss: 0.0080 - mae: 0.0699 - val_loss: 0.0045 - val_mae: 0.0412
Epoch 5/50
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 86ms/step - loss: 0.0084 - mae: 0.0742 - val_loss: 0.0039 - val_mae: 0.0424
Epoch 6/50
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 83ms/step - loss: 0.0071 - mae: 0.0659 - val_loss: 0.0030 - val_mae: 0.0325
Epoch 7/50
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[

In [10]:
# Evaluate LSTM
y_pred_test_lstm = lstm_model.predict(X_test_lstm)
y_pred_test_lstm = scaler_y.inverse_transform(y_pred_test_lstm)
y_test_lstm_actual = scaler_y.inverse_transform(y_test_lstm)

lstm_test_rmse = np.sqrt(mean_squared_error(y_test_lstm_actual, y_pred_test_lstm))
lstm_test_mae = mean_absolute_error(y_test_lstm_actual, y_pred_test_lstm)
lstm_test_r2 = r2_score(y_test_lstm_actual, y_pred_test_lstm)
print(f"LSTM RMSE: {lstm_test_rmse:.4f}, MAE: {lstm_test_mae:.4f}, R²: {lstm_test_r2:.4f}")

[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 73ms/step
LSTM RMSE: 0.3688, MAE: 0.2116, R²: 0.8545


In [11]:
# Prediction Functions
def predict_future_prices(model, last_sequence, scaler_y, days=30):
    future_predictions = []
    current_sequence = last_sequence.copy()

    for _ in range(days):
        pred = model.predict(current_sequence.reshape(1, time_steps, -1), verbose=0)
        future_predictions.append(pred[0, 0])
        current_sequence = np.roll(current_sequence, -1, axis=0)
        current_sequence[-1, 0] = pred[0, 0]

    return scaler_y.inverse_transform(np.array(future_predictions).reshape(-1, 1))

In [12]:
# Generate Insights Function
def generate_insights(df):
    insights = []
    start_price = df['Close'].iloc[0]
    end_price = df['Close'].iloc[-1]
    total_return = ((end_price - start_price) / start_price) * 100

    insights.append("## 📊 Overall Performance\n")
    insights.append(f"- **Total Return:** {total_return:.2f}%")
    insights.append(f"- **Start Price (2014):** ₹{start_price:.2f}")
    insights.append(f"- **Current Price (2020):** ₹{end_price:.2f}\n")

    avg_volatility = df['Volatility'].mean()
    risk_level = 'High' if avg_volatility > 0.02 else 'Moderate' if avg_volatility > 0.01 else 'Low'
    insights.append("## 📉 Risk Assessment\n")
    insights.append(f"- **Average Volatility:** {avg_volatility:.4f}")
    insights.append(f"- **Risk Level:** {risk_level}\n")

    current_trend = df['Trend'].iloc[-1]
    current_rsi = df['RSI'].iloc[-1]
    insights.append("## 📈 Current Market Status\n")
    insights.append(f"- **Current Trend:** {current_trend}")
    insights.append(f"- **RSI:** {current_rsi:.2f}")

    if current_rsi > 70:
        insights.append("- **Signal:** ⚠️ Overbought")
    elif current_rsi < 30:
        insights.append("- **Signal:** ⚠️ Oversold")
    else:
        insights.append("- **Signal:** ✅ Neutral\n")

    insights.append("## 🤖 Model Accuracy\n")
    insights.append(f"- **LSTM RMSE:** {lstm_test_rmse:.4f}")
    insights.append(f"- **LSTM R² Score:** {lstm_test_r2:.4f}\n")
    insights.append("⚠️ **Disclaimer:** Educational purposes only.")

    return "\n".join(insights)

In [13]:
# Gradio Interface Functions
def gradio_predict_price(days):
    last_sequence = X_lstm[-1]
    predictions = predict_future_prices(lstm_model, last_sequence, scaler_y, days=int(days))

    last_date = df_clean['Date'].iloc[-1]
    future_dates = pd.date_range(start=last_date + timedelta(days=1), periods=int(days), freq='D')

    fig = go.Figure()
    historical = df_clean.tail(60)
    fig.add_trace(go.Scatter(x=historical['Date'], y=historical['Close'], name='Historical', line=dict(color='blue', width=2)))
    fig.add_trace(go.Scatter(x=future_dates, y=predictions.flatten(), name='Predicted', line=dict(color='red', width=2, dash='dash')))
    fig.update_layout(title=f'Stock Price Prediction - Next {int(days)} Days', xaxis_title='Date', yaxis_title='Price', height=500)

    current_price = df_clean['Close'].iloc[-1]
    predicted_final = predictions[-1][0]
    price_change = predicted_final - current_price
    percent_change = (price_change / current_price) * 100

    summary = f"""## 📊 Prediction Summary\n\n- **Current Price:** ₹{current_price:.2f}\n- **Predicted Price (Day {int(days)}):** ₹{predicted_final:.2f}\n- **Expected Change:** ₹{price_change:.2f} ({percent_change:+.2f}%)\n- **Trend:** {'📈 Bullish' if price_change > 0 else '📉 Bearish'}"""

    return fig, summary

def gradio_technical_analysis():
    latest = df.iloc[-1]
    analysis = f"""## 📈 Current Technical Indicators\n\n- **RSI:** {latest['RSI']:.2f}\n- **MACD:** {latest['MACD']:.4f}\n- **Volatility:** {latest['Volatility']:.4f}\n- **Trend:** {latest['Trend']}"""
    return analysis

def gradio_show_insights():
    return generate_insights(df)

In [14]:
# Create Gradio Interface
with gr.Blocks(title="Stock Market Analysis", theme=gr.themes.Soft()) as demo:
    gr.Markdown("# 📈 Indian Stock Market Analysis & Prediction System\n## Stock: 717503.BO (2014-2020)")

    with gr.Tabs():
        with gr.Tab("🔮 Price Prediction"):
            gr.Markdown("### Predict Future Stock Prices")
            with gr.Row():
                days_input = gr.Slider(minimum=7, maximum=90, value=30, step=1, label="Prediction Days")
                predict_btn = gr.Button("Predict", variant="primary")
            prediction_plot = gr.Plot(label="Price Prediction Chart")
            prediction_summary = gr.Markdown()
            predict_btn.click(fn=gradio_predict_price, inputs=[days_input], outputs=[prediction_plot, prediction_summary])

        with gr.Tab("📊 Technical Analysis"):
            gr.Markdown("### Technical Indicators")
            analyze_btn = gr.Button("Analyze", variant="primary")
            technical_summary = gr.Markdown()
            analyze_btn.click(fn=gradio_technical_analysis, inputs=[], outputs=[technical_summary])

        with gr.Tab("💡 Key Insights & Recommendations"):
            gr.Markdown("### Comprehensive Market Analysis")
            insights_btn = gr.Button("Generate Insights", variant="primary")
            insights_output = gr.Markdown()
            insights_btn.click(fn=gradio_show_insights, inputs=[], outputs=[insights_output])

    gr.Markdown("---\n⚠️ **Disclaimer:** For educational purposes only. Not financial advice.")

demo.launch(share=True, debug=False)

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://6dc75a093fc868c261.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


