In [22]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import xgboost as xgb
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
import plotly.graph_objects as go
import plotly.express as px
from datetime import timedelta

# Suppress warnings
import warnings
warnings.filterwarnings('ignore')

In [23]:
# Load company info (Equity_L.csv)
company_df = pd.read_csv('EQUITY_L.csv')
company_df.columns = company_df.columns.str.replace(' ', '')  # Clean column names

# Load price data (all_stocks_data.csv)
price_df = pd.read_csv('all_stocks_data.csv')
price_df.columns = price_df.columns.str.replace(' ', '')  # Clean column names

# Convert Date to datetime
price_df['Date'] = pd.to_datetime(price_df['Date'])

# Display samples
print("Company Data (Equity_L) Sample:")
print(company_df.head())
print("\nPrice Data (all_stocks_data) Sample:")
print(price_df.head())
print(f"\nUnique Symbols in Price Data: {price_df['Symbol'].unique()}")

Company Data (Equity_L) Sample:
            CompanyName                Industry   Symbol Series      ISINCode
0      360 ONE WAM Ltd.      Financial Services   360ONE     EQ  INE466L01038
1         3M India Ltd.             Diversified  3MINDIA     EQ  INE470A01017
2        ABB India Ltd.           Capital Goods      ABB     EQ  INE117A01022
3              ACC Ltd.  Construction Materials      ACC     EQ  INE012A01025
4  AIA Engineering Ltd.           Capital Goods   AIAENG     EQ  INE212H01026

Price Data (all_stocks_data) Sample:
   Symbol       Date        Open        High         Low       Close   Volume
0  360ONE 2022-09-16  401.712709  442.821761  398.905255  429.173798  1167304
1  360ONE 2022-09-19  431.320691  436.451951  425.139599  429.327179   222264
2  360ONE 2022-09-20  435.980118  458.864355  429.586692  446.018524   593832
3  360ONE 2022-09-21  456.505157  457.165744  437.879286  442.939758   239496
4  360ONE 2022-09-22  444.603019  445.157425  432.323372  435.803192   1

In [24]:
# Add features to price data
def add_features(df):
    df['DailyReturn'] = df['Close'].pct_change()
    df['MA5'] = df['Close'].rolling(window=5).mean()
    df['MA20'] = df['Close'].rolling(window=20).mean()
    df['Volatility'] = df['Close'].rolling(window=20).std()
    df['Target'] = (df['Close'].shift(-1) / df['Close'] >= 1.01).astype(int)
    return df

# Apply features per stock
price_df = price_df.groupby('Symbol').apply(add_features).reset_index(drop=True)
price_df = price_df.dropna()

print("Price Data with Features:")
print(price_df.head())

Price Data with Features:
    Symbol       Date        Open        High         Low       Close  Volume  \
19  360ONE 2022-10-14  418.073805  430.282665  418.073805  426.566925  260412   
20  360ONE 2022-10-17  428.159369  447.292473  423.323009  444.154755  434780   
21  360ONE 2022-10-18  444.154764  448.247970  437.950057  444.555817  198928   
22  360ONE 2022-10-19  445.499508  448.247982  440.002590  447.021210   60228   
23  360ONE 2022-10-20  448.247967  448.247967  422.249596  426.673096  304160   

    DailyReturn         MA5        MA20  Volatility  Target  
19     0.026251  422.969128  427.226895   10.627681       1  
20     0.041231  427.696979  427.975943   11.280036       0  
21     0.000903  431.207471  428.737375   11.874377       0  
22     0.005546  435.590863  428.787509   11.953035       0  
23    -0.045519  437.794360  427.974176   11.483582       0  


In [25]:
# Prepare LSTM data for all stocks in price dataset
def prepare_lstm_data(df, lookback=20):
    X, y = [], []
    scalers = {}
    
    for symbol in df['Symbol'].unique():
        stock_data = df[df['Symbol'] == symbol][['Close']].values
        scaler = MinMaxScaler(feature_range=(0, 1))
        scaled_data = scaler.fit_transform(stock_data)
        scalers[symbol] = scaler
        
        for i in range(lookback, len(scaled_data) - 7):
            X.append(scaled_data[i-lookback:i])
            y.append(scaled_data[i:i+7])
    
    X, y = np.array(X), np.array(y)
    return X, y, scalers

X_lstm, y_lstm, scalers = prepare_lstm_data(price_df)
print(f"LSTM Data Shape: X={X_lstm.shape}, y={y_lstm.shape}")

LSTM Data Shape: X=(107386, 20, 1), y=(107386, 7, 1)


In [26]:
# Split data
X_train, X_test, y_train, y_test = train_test_split(X_lstm, y_lstm, test_size=0.2, shuffle=False)

# Build LSTM model
lstm_model = Sequential()
lstm_model.add(LSTM(50, return_sequences=True, input_shape=(X_train.shape[1], 1)))
lstm_model.add(Dropout(0.2))
lstm_model.add(LSTM(50))
lstm_model.add(Dropout(0.2))
lstm_model.add(Dense(7))  # Predict 7 days
lstm_model.compile(optimizer='adam', loss='mse')

# Train
lstm_model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test), verbose=1)

# Save model
lstm_model.save('lstm_model_all.h5')

Epoch 1/20
[1m2685/2685[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 19ms/step - loss: 0.0134 - val_loss: 0.0033
Epoch 2/20
[1m2685/2685[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m67s[0m 25ms/step - loss: 0.0040 - val_loss: 0.0031
Epoch 3/20
[1m2685/2685[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 20ms/step - loss: 0.0036 - val_loss: 0.0029
Epoch 4/20
[1m2685/2685[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 20ms/step - loss: 0.0036 - val_loss: 0.0029
Epoch 5/20
[1m2685/2685[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m76s[0m 28ms/step - loss: 0.0035 - val_loss: 0.0029
Epoch 6/20
[1m2685/2685[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 20ms/step - loss: 0.0035 - val_loss: 0.0028
Epoch 7/20
[1m2685/2685[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 22ms/step - loss: 0.0035 - val_loss: 0.0029
Epoch 8/20
[1m2685/2685[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m70s[0m 26ms/step - loss: 0.0035 - val_loss: 0.0029
Epoch 9/



In [27]:
# Features for XGBoost
features = ['Close', 'Volume', 'MA5', 'MA20', 'Volatility', 'DailyReturn']
X_xgb = price_df[features]
y_xgb = price_df['Target']

# Scale features
scaler_xgb = MinMaxScaler()
X_xgb_scaled = scaler_xgb.fit_transform(X_xgb)

# Split data
X_train_xgb, X_test_xgb, y_train_xgb, y_test_xgb = train_test_split(X_xgb_scaled, y_xgb, test_size=0.2, random_state=42)

print(f"XGBoost Data Shape: X={X_train_xgb.shape}, y={y_train_xgb.shape}")

XGBoost Data Shape: X=(90120, 6), y=(90120,)


In [28]:
# Train XGBoost
xgb_model = xgb.XGBClassifier(use_label_encoder=False, eval_metric='logloss')
xgb_model.fit(X_train_xgb, y_train_xgb)

# Evaluate
y_pred = xgb_model.predict(X_test_xgb)
from sklearn.metrics import accuracy_score
print(f"XGBoost Accuracy: {accuracy_score(y_test_xgb, y_pred):.2f}")

# Save model
xgb_model.save_model('xgb_model_all.json')

XGBoost Accuracy: 0.74


In [29]:
def hybrid_prediction(symbol, price_df, lstm_model, xgb_model, scalers, scaler_xgb, lookback=20):
    stock_data = price_df[price_df['Symbol'] == symbol].sort_values('Date')
    
    if len(stock_data) < lookback or symbol not in scalers:
        return None, None
    
    # LSTM Prediction
    scaler = scalers[symbol]
    recent_data = stock_data['Close'].values[-lookback:]
    scaled_recent = scaler.transform(recent_data.reshape(-1, 1))
    X_pred = np.array([scaled_recent])
    lstm_pred = lstm_model.predict(X_pred)
    week_prices = scaler.inverse_transform(lstm_pred).flatten()
    
    # XGBoost Prediction
    latest_features = stock_data[features].iloc[-1].values.reshape(1, -1)
    scaled_features = scaler_xgb.transform(latest_features)
    profit_prob = xgb_model.predict_proba(scaled_features)[0, 1]
    
    return week_prices, profit_prob

# Test prediction
symbol = 'HCLTECH'
week_prices, profit_prob = hybrid_prediction(symbol, price_df, lstm_model, xgb_model, scalers, scaler_xgb)
if week_prices is not None:
    print(f"1-Week Prediction for {symbol}: {week_prices}")
    print(f"Probability of 1% Profit Today: {profit_prob:.2f}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 334ms/step
1-Week Prediction for HCLTECH: [1585.5009 1588.0616 1591.196  1596.9835 1597.7451 1597.1271 1597.5212]
Probability of 1% Profit Today: 0.20


In [30]:
def plot_stock_charts(symbol, price_df, week_prices):
    stock_data = price_df[price_df['Symbol'] == symbol].tail(100)
    
    if stock_data.empty:
        print(f"No price data for {symbol} to plot.")
        return
    
    # Candlestick
    fig1 = go.Figure(data=[go.Candlestick(x=stock_data['Date'],
                                          open=stock_data['Open'],
                                          high=stock_data['High'],
                                          low=stock_data['Low'],
                                          close=stock_data['Close'])])
    fig1.update_layout(title=f'{symbol} Candlestick Chart', xaxis_title='Date', yaxis_title='Price')
    
    # Moving Averages with Prediction
    future_dates = pd.date_range(start=stock_data['Date'].iloc[-1] + timedelta(days=1), periods=7)
    pred_df = pd.DataFrame({'Date': future_dates, 'Predicted': week_prices})
    fig2 = go.Figure()
    fig2.add_trace(go.Scatter(x=stock_data['Date'], y=stock_data['MA5'], name='MA5'))
    fig2.add_trace(go.Scatter(x=stock_data['Date'], y=stock_data['MA20'], name='MA20'))
    fig2.add_trace(go.Scatter(x=stock_data['Date'], y=stock_data['Close'], name='Close'))
    fig2.add_trace(go.Scatter(x=pred_df['Date'], y=pred_df['Predicted'], name='Prediction', line=dict(dash='dash')))
    fig2.update_layout(title=f'{symbol} Price with Moving Averages & Prediction', xaxis_title='Date', yaxis_title='Price')
    
    # Volatility
    fig3 = px.line(stock_data, x='Date', y='Volatility', title=f'{symbol} 20-Day Volatility')
    
    fig1.show()
    fig2.show()
    fig3.show()

# Generate plots
if week_prices is not None:
    plot_stock_charts('HCLTECH', price_df, week_prices)

In [31]:
def trading_bot(price_df, capital=100000):
    daily_profit_target = capital * 0.01
    stocks = price_df['Symbol'].unique()
    
    for symbol in stocks:
        week_prices, profit_prob = hybrid_prediction(symbol, price_df, lstm_model, xgb_model, scalers, scaler_xgb)
        if week_prices is None:
            continue
        latest_close = price_df[price_df['Symbol'] == symbol]['Close'].iloc[-1]
        
        if profit_prob > 0.7:
            shares = daily_profit_target / (week_prices[0] - latest_close)
            if shares > 0 and (shares * latest_close) <= capital:
                print(f"Buy {symbol}: {shares:.2f} shares at {latest_close:.2f}, Target Sell at {week_prices[0]:.2f}")
                capital -= shares * latest_close
                capital += shares * week_prices[0]
                print(f"New Capital: {capital:.2f}")
    
    return capital

new_capital = trading_bot(price_df)
print(f"Final Capital: {new_capital:.2f}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47

In [34]:
# User input using company list
company_name = input("Enter Company Name (e.g., 'HCL Technologies Ltd.'): ")
symbol = company_df[company_df['CompanyName'] == company_name]['Symbol'].iloc[0] if company_name in company_df['CompanyName'].values else None

if symbol:
    week_prices, profit_prob = hybrid_prediction(symbol, price_df, lstm_model, xgb_model, scalers, scaler_xgb)
    if week_prices is not None:
        print(f"1-Week Prediction for {company_name} ({symbol}): {week_prices}")
        print(f"Probability of 1% Profit Today: {profit_prob:.2f}")
        plot_stock_charts(symbol, price_df, week_prices)
    else:
        print(f"No price data available for {company_name} ({symbol}). Prediction not possible.")
else:
    print("Company not found in the list.")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step
1-Week Prediction for HCL Technologies Ltd. (HCLTECH): [1585.5009 1588.0616 1591.196  1596.9835 1597.7451 1597.1271 1597.5212]
Probability of 1% Profit Today: 0.20


In [35]:
import joblib

# Save LSTM model
lstm_model.save('lstm_model_all.h5')

# Save XGBoost model
joblib.dump(xgb_model, 'xgb_model_all.pkl')

# Save scalers
joblib.dump(scalers, 'scalers.pkl')
joblib.dump(scaler_xgb, 'scaler_xgb.pkl')

print("Hybrid model and scalers have been saved.")



Hybrid model and scalers have been saved.


In [36]:
from sklearn.metrics import accuracy_score

def hybrid_model_accuracy(price_df, lstm_model, xgb_model, scalers, scaler_xgb, lookback=20):
    y_true = []
    y_pred = []
    
    for symbol in price_df['Symbol'].unique():
        stock_data = price_df[price_df['Symbol'] == symbol].sort_values('Date')
        
        if len(stock_data) < lookback or symbol not in scalers:
            continue
        
        # LSTM Prediction
        scaler = scalers[symbol]
        recent_data = stock_data['Close'].values[-lookback:]
        scaled_recent = scaler.transform(recent_data.reshape(-1, 1))
        X_pred = np.array([scaled_recent])
        lstm_pred = lstm_model.predict(X_pred)
        week_prices = scaler.inverse_transform(lstm_pred).flatten()
        
        # XGBoost Prediction
        latest_features = stock_data[features].iloc[-1].values.reshape(1, -1)
        scaled_features = scaler_xgb.transform(latest_features)
        profit_prob = xgb_model.predict_proba(scaled_features)[0, 1]
        
        # Actual value
        actual = stock_data['Target'].iloc[-1]
        
        # Predicted value
        predicted = 1 if profit_prob > 0.5 else 0
        
        y_true.append(actual)
        y_pred.append(predicted)
    
    accuracy = accuracy_score(y_true, y_pred)
    return accuracy

accuracy = hybrid_model_accuracy(price_df, lstm_model, xgb_model, scalers, scaler_xgb)
print(f"Hybrid Model Accuracy: {accuracy:.2f}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 633ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 84ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 83ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 70ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 77ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4