In [1]:
import datetime
from src import CompanyDetails, FindValues, Analyse,PredictValues, SharePricePrediction
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from keras.models import Sequential, load_model
from keras.layers import LSTM, Dense, Input, Dropout
from keras.regularizers import l2
kernel_regularizer=l2(0.01)
from keras.optimizers import Adam

In [3]:
c_list = ['BAJFINANCE','MARUTI']
list_of_df = []
windowsize = 5
for c in c_list:
    company_details = CompanyDetails(c)
    share_price_df = company_details.sharePriceRange(period='2y')
    share_price_df['Date'] = share_price_df['Date'].apply(lambda d: datetime.datetime.strptime(d, '%Y-%m-%d')).sort_index()
    share_price_df.index = share_price_df.pop('Date')
    list_of_df.append(share_price_df)
    print(f'{c} added to list_of_df')

print(list_of_df)


BAJFINANCE added to list_of_df
MARUTI added to list_of_df
[              Close       ema100       ema200
Date                                         
2023-03-28  5483.50  5483.500000  5483.500000
2023-03-29  5573.42  5485.280594  5484.394726
2023-03-31  5565.05  5486.860186  5485.197266
2023-04-03  5662.44  5490.337014  5486.960876
2023-04-05  5708.27  5494.652519  5489.162957
...             ...          ...          ...
2025-03-24  9090.05  7812.052926  7473.221845
2025-03-25  9067.25  7836.908313  7489.082822
2025-03-26  8866.05  7857.287357  7502.783988
2025-03-27  9003.85  7879.991567  7517.719968
2025-03-28  8945.60  7901.092724  7531.927730

[494 rows x 3 columns],                Close        ema100        ema200
Date                                            
2023-03-28   8056.18   8056.180000   8056.180000
2023-03-29   8141.07   8057.860990   8057.024677
2023-03-31   8137.98   8059.447505   8057.830202
2023-04-03   8344.96   8065.101218   8060.687215
2023-04-05   8290.00   8

In [4]:
def create_windows_for_company(df, window_size=5):
    """
    Given a DataFrame with columns ['Close', 'EMA200'],
    create input windows (X) and corresponding target values (y).
    
    X shape => (num_samples, window_size, 2)  # 2 features: Close & EMA200
    y shape => (num_samples,)                # Predict next day's Close
    """
    X, y = [], []
    
    # Sort by date if not already
    df = df.sort_index()  # assumes date is the index
    close_vals = df['Close'].values
    ema_vals   = df['ema200'].values
    
    for i in range(len(df) - window_size):
        # Window from i to i+window_size-1
        X_window = []
        for j in range(window_size):
            X_window.append([close_vals[i+j], ema_vals[i+j]])
        
        # Target is the Close at i+window_size
        target = close_vals[i + window_size]
        
        X.append(X_window)
        y.append(target)
    
    return np.array(X), np.array(y)

In [5]:
def load_and_combine_data(list_of_dataframes, window_size=5):
    """
    Accepts multiple DataFrames (one per company).
    Returns combined X, y for training the model on all companies' data.
    """
    X_combined, y_combined = [], []
    
    for df in list_of_dataframes:
        X, y = create_windows_for_company(df, window_size)
        if len(X) > 0:
            X_combined.append(X)
            y_combined.append(y)
    
    # Concatenate all arrays
    if len(X_combined) == 0:
        raise ValueError("No data found after window creation.")
    
    X_all = np.concatenate(X_combined, axis=0)
    y_all = np.concatenate(y_combined, axis=0)
    return X_all, y_all


In [6]:
pub_x_train, pub_y_train = None, None
pub_x_valid, pub_y_valid = None, None
pub_x_test, pub_y_test = None, None


def train_model(x_data, y_data):
    """
    Build, compile, and train an LSTM model on the combined dataset.
    Returns the trained model.
    """
    # Split into train (80%), val (10%), test (10%) - or any ratio you prefer
    num_samples = len(x_data)
    train_end = int(num_samples * 0.8)
    val_end   = int(num_samples * 0.9)
    
    x_train, y_train = x_data[:train_end], y_data[:train_end]
    x_val,   y_val   = x_data[train_end:val_end], y_data[train_end:val_end]
    x_test,  y_test  = x_data[val_end:], y_data[val_end:]

    pub_x_train, pub_y_train = x_train, y_train
    pub_x_valid, pub_y_valid = x_val, y_val
    pub_x_test, pub_y_test = x_test, y_test

    model = Sequential()

    model.add(LSTM(units=120, activation='relu', return_sequences=True, input_shape=(x_data.shape[1], x_data.shape[2]), kernel_regularizer=l2(0.01), recurrent_dropout=0.2))

    model.add(LSTM(units=64, activation='relu', return_sequences=True, kernel_regularizer=l2(0.01), recurrent_dropout=0.2))

    model.add(LSTM(units=32, activation='relu', return_sequences=False, kernel_regularizer=l2(0.01), recurrent_dropout=0.2))
    
    model.add(Dense(32, activation='relu', kernel_regularizer=l2(0.01)))
    model.add(Dropout(0.2))

    model.add(Dense(16, activation='relu', kernel_regularizer=l2(0.01)))
    model.add(Dropout(0.2))

    # final output layer
    model.add(Dense(1, activation='linear'))

    model.compile(
        loss='mse',
        optimizer=Adam(learning_rate=0.001),
        metrics=['mae']
    )

    model.fit(
        x_train, y_train,
        validation_data=(x_val, y_val),
        epochs=10,
        batch_size=32
    )

    test_loss, test_mae = model.evaluate(x_test, y_test)
    print(f"Test MSE: {test_loss:.4f}, Test MAE: {test_mae:.4f}")
    
    return model
    

In [7]:
x_all, y_all = load_and_combine_data(list_of_df, window_size=windowsize)

print(x_all.shape)
print(y_all.shape)

(978, 5, 2)
(978,)


In [8]:
model = train_model(x_all, y_all)
model.save("model_2.keras")

Epoch 1/10


  super().__init__(**kwargs)


[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 42ms/step - loss: 73076280.0000 - mae: 8356.3564 - val_loss: 135803408.0000 - val_mae: 11646.3750
Epoch 2/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - loss: 57026992.0000 - mae: 7321.2661 - val_loss: 28934580.0000 - val_mae: 4972.1494
Epoch 3/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - loss: 18150200.0000 - mae: 3567.4089 - val_loss: 5998383.0000 - val_mae: 2121.6860
Epoch 4/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - loss: 10781783.0000 - mae: 2571.3611 - val_loss: 281812.7812 - val_mae: 424.3774
Epoch 5/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - loss: 9407202.0000 - mae: 2422.8523 - val_loss: 2624779.2500 - val_mae: 1570.3090
Epoch 6/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - loss: 8256580.0000 - mae: 2272.3225 - val_loss: 774169.0625 - val_mae: 83

In [9]:
print(model)

<Sequential name=sequential, built=True>
