In [None]:
import os

# show where you are
print("Current directory:", os.getcwd())
print("\nFiles here:", os.listdir())

# show whatâ€™s inside sample_data (if it exists)
if os.path.exists("sample_data"):
    print("\nFiles inside sample_data:", os.listdir("sample_data"))


In [None]:
import pandas as pd

# STEP 0: LOAD STOCK DATA

df_icici = pd.read_csv("ICICI_Bank_hourly.csv")
df_ntpc = pd.read_csv("NTPC_hourly.csv")
df_ambuja = pd.read_csv("Ambuja_Cement_hourly.csv")
df_wipro = pd.read_csv("Wipro_hourly.csv")

# Optional cleanup â€” make column names consistent
for df in [df_icici, df_ntpc, df_ambuja, df_wipro]:
    df.columns = [col.strip().capitalize() for col in df.columns]

print("âœ… Datasets loaded successfully.")
for name, df in zip(["ICICI", "NTPC", "Ambuja", "Wipro"], [df_icici, df_ntpc, df_ambuja, df_wipro]):
    print(f"{name}: {df.shape[0]} rows | columns: {list(df.columns)}")


In [None]:
!pip install pandas-ta


In [None]:
# Add the indicator function we built earlier
import pandas_ta as ta
import pandas as pd

def add_indicators(df):
    df = df.copy()
    df.columns = [c.lower().strip() for c in df.columns]

    # Convert numeric columns
    for col in df.columns:
        df[col] = pd.to_numeric(df[col], errors='coerce')

    if 'close' not in df.columns:
        if 'price' in df.columns:
            df['close'] = df['price']
        else:
            raise KeyError("No 'close' or 'price' column found!")

    df['ema_10'] = ta.ema(df['close'], length=10)
    df['ema_30'] = ta.ema(df['close'], length=30)
    df['rsi'] = ta.rsi(df['close'], length=14)
    df['roc'] = ta.roc(df['close'], length=10)
    df['momentum'] = ta.mom(df['close'], length=10)

    df = df.dropna().reset_index(drop=True)
    return df

# Apply to all 4
df_icici_ind = add_indicators(df_icici)
df_ntpc_ind = add_indicators(df_ntpc)
df_ambuja_ind = add_indicators(df_ambuja)
df_wipro_ind = add_indicators(df_wipro)

print("âœ… Indicators added successfully!")
df_icici_ind.head()


In [None]:
df_icici_ind.shape

In [None]:
import pandas_ta as ta
import pandas as pd

def add_indicators(df):
    df = df.copy()
    df.columns = [c.lower().strip() for c in df.columns]

    # Convert numeric columns
    for col in df.columns:
        df[col] = pd.to_numeric(df[col], errors='coerce')

    if 'close' not in df.columns:
        if 'price' in df.columns:
            df['close'] = df['price']
        else:
            raise KeyError("No 'close' or 'price' column found!")

    # Add indicators
    df['ema_10'] = ta.ema(df['close'], length=10)
    df['ema_30'] = ta.ema(df['close'], length=30)
    df['rsi'] = ta.rsi(df['close'], length=14)
    df['roc'] = ta.roc(df['close'], length=10)
    df['momentum'] = ta.mom(df['close'], length=10)

    # Fill missing values instead of dropping all
    df = df.fillna(method='ffill').fillna(method='bfill')
    df = df.reset_index(drop=True)
    return df

In [None]:
df_icici_ind = add_indicators(df_icici)
print(df_icici_ind.shape)
df_icici_ind.head()

In [None]:
print(df_icici.shape)
df_icici.head()
df_icici.info()

In [None]:
!head -n 10 sample_data/ICICI_Bank_hourly.csv

In [None]:
from google.colab import files
uploaded = files.upload()


In [None]:
import pandas as pd

df_icici = pd.read_csv("ICICI_Bank_hourly.csv")
print("Shape:", df_icici.shape)
print(df_icici.head(10))

In [None]:
print(df_icici.columns.tolist())

In [None]:
import pandas as pd

df_icici = pd.read_csv("ICICI_Bank_hourly.csv", skiprows=[1])

# Clean up column names
df_icici.columns = [c.strip().lower() for c in df_icici.columns]

# Drop missing and invalid rows
df_icici = df_icici.dropna().reset_index(drop=True)

# Convert numeric columns
num_cols = ['close', 'high', 'low', 'open', 'volume']
for col in num_cols:
    df_icici[col] = pd.to_numeric(df_icici[col], errors='coerce')

print(df_icici.shape)
print(df_icici.head())


In [None]:
import numpy as np

def add_indicators(df):
    df['SMA_10'] = df['Close'].rolling(window=10).mean()
    df['SMA_50'] = df['Close'].rolling(window=50).mean()
    df['EMA_20'] = df['Close'].ewm(span=20, adjust=False).mean()

    delta = df['Close'].diff()
    gain = np.where(delta > 0, delta, 0)
    loss = np.where(delta < 0, -delta, 0)
    avg_gain = pd.Series(gain).rolling(14).mean()
    avg_loss = pd.Series(loss).rolling(14).mean()
    rs = avg_gain / avg_loss
    df['RSI'] = 100 - (100 / (1 + rs))

    df = df.dropna().reset_index(drop=True)
    return df

In [None]:
print(df_icici.columns)

In [None]:
df_icici.rename(columns={
    'price': 'Price',
    'close': 'Close',
    'high': 'High',
    'low': 'Low',
    'open': 'Open',
    'volume': 'Volume'
}, inplace=True)

In [None]:
df_icici_ind = add_indicators(df_icici)
print("Indicators added successfully!")
print(df_icici_ind.head())

In [None]:
df_icici_ind = df_icici_ind.ffill().bfill()

In [None]:
print(df_icici_ind.columns)

In [None]:
data = df_icici_ind[['Close', 'EMA_20', 'SMA_10', 'SMA_50', 'RSI']].copy()

In [None]:
# Function to create sequences for LSTM
def create_sequences(data, seq_length=60):
    X, y = [], []
    for i in range(seq_length, len(data)):
        X.append(data[i-seq_length:i])
        y.append(data[i, 0])  # Predict the 'Close' price
    return np.array(X), np.array(y)

In [None]:
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(data)
scaled_data = np.array(scaled_data)

X, y = create_sequences(scaled_data, seq_length=60)
print("âœ… Sequence shape:", X.shape, y.shape)

In [None]:
data.columns = [col.lower() for col in data.columns]

In [None]:
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(data)
scaled_data = np.array(scaled_data)

def create_sequences(data, seq_length=60):
    X, y = [], []
    for i in range(seq_length, len(data)):
        X.append(data[i-seq_length:i])
        y.append(data[i, 0])  # Predict close price
    return np.array(X), np.array(y)

seq_length = 60
X, y = create_sequences(scaled_data, seq_length)
print(" Sequence shape:", X.shape, y.shape)

In [None]:
df_icici_ind['EMA_30'] = df_icici_ind['Close'].ewm(span=30, adjust=False).mean()
df_icici_ind['ROC'] = df_icici_ind['Close'].pct_change(periods=5) * 100
df_icici_ind['Momentum'] = df_icici_ind['Close'] - df_icici_ind['Close'].shift(5)
df_icici_ind.dropna(inplace=True)

In [None]:
data = df_icici_ind[['Close', 'EMA_20', 'EMA_30', 'RSI', 'ROC', 'Momentum']].copy()

In [None]:
df_icici_ind['EMA_10'] = df_icici_ind['Close'].ewm(span=10, adjust=False).mean()
df_icici_ind['EMA_30'] = df_icici_ind['Close'].ewm(span=30, adjust=False).mean()
df_icici_ind['ROC'] = df_icici_ind['Close'].pct_change(periods=5) * 100
df_icici_ind['Momentum'] = df_icici_ind['Close'] - df_icici_ind['Close'].shift(5)
df_icici_ind.dropna(inplace=True)

In [None]:
data = df_icici_ind[['Close', 'EMA_10', 'EMA_30', 'RSI', 'ROC', 'Momentum']].copy()

In [None]:
from sklearn.preprocessing import MinMaxScaler
import numpy as np

scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(data)
scaled_data = np.array(scaled_data)

def create_sequences(data, seq_length=60):
    X, y = [], []
    for i in range(seq_length, len(data)):
        X.append(data[i-seq_length:i])
        y.append(data[i, 0])  # predict 'close' price
    return np.array(X), np.array(y)

seq_length = 60
X, y = create_sequences(scaled_data, seq_length)
print("âœ… Sequence shape:", X.shape, y.shape)

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

model = Sequential([
    LSTM(64, return_sequences=True, input_shape=(X.shape[1], X.shape[2])),
    Dropout(0.2),
    LSTM(64, return_sequences=False),
    Dropout(0.2),
    Dense(32, activation='relu'),
    Dense(1)
])

model.compile(optimizer='adam', loss='mean_squared_error')
model.summary()

In [None]:
history = model.fit(
    X, y,
    epochs=50,
    batch_size=32,
    validation_split=0.2,
    shuffle=False
)

In [None]:
import matplotlib.pyplot as plt

plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.legend()
plt.title('LSTM Training Performance')
plt.show()

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Bidirectional, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
import matplotlib.pyplot as plt

# Split data into train and test
split = int(0.8 * len(X))
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

# Early stopping to prevent overfitting
early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# --- Model 1: Regular LSTM ---
lstm_model = Sequential([
    LSTM(64, return_sequences=True, input_shape=(X.shape[1], X.shape[2])),
    Dropout(0.2),
    LSTM(64, return_sequences=False),
    Dense(32, activation='relu'),
    Dense(1)
])

lstm_model.compile(optimizer='adam', loss='mse')

# --- Model 2: Bidirectional LSTM ---
bilstm_model = Sequential([
    Bidirectional(LSTM(64, return_sequences=True), input_shape=(X.shape[1], X.shape[2])),
    Dropout(0.2),
    Bidirectional(LSTM(64, return_sequences=False)),
    Dense(32, activation='relu'),
    Dense(1)
])

bilstm_model.compile(optimizer='adam', loss='mse')

# --- Train both ---
print("Training LSTM...")
history_lstm = lstm_model.fit(X_train, y_train, epochs=50, batch_size=32,
                              validation_data=(X_test, y_test), callbacks=[early_stop], verbose=1)

print("\nTraining Bi-LSTM...")
history_bilstm = bilstm_model.fit(X_train, y_train, epochs=50, batch_size=32,
                                  validation_data=(X_test, y_test), callbacks=[early_stop], verbose=1)

# --- Plot training performance ---
plt.figure(figsize=(10,6))
plt.plot(history_lstm.history['val_loss'], label='LSTM Val Loss')
plt.plot(history_bilstm.history['val_loss'], label='Bi-LSTM Val Loss')
plt.title("LSTM vs Bi-LSTM Validation Loss")
plt.xlabel("Epochs")
plt.ylabel("MSE Loss")
plt.legend()
plt.show()


In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error
import numpy as np

# --- Predict on test set ---
y_pred_lstm = lstm_model.predict(X_test)
y_pred_bilstm = bilstm_model.predict(X_test)

# --- Invert scaling to get real price values ---
# Rebuild full-length array for inverse transform
y_test_full = np.zeros((len(y_test), data.shape[1]))
y_pred_full_lstm = np.zeros((len(y_pred_lstm), data.shape[1]))
y_pred_full_bilstm = np.zeros((len(y_pred_bilstm), data.shape[1]))

y_test_full[:, 0] = y_test
y_pred_full_lstm[:, 0] = y_pred_lstm.flatten()
y_pred_full_bilstm[:, 0] = y_pred_bilstm.flatten()

y_test_real = scaler.inverse_transform(y_test_full)[:, 0]
y_pred_real_lstm = scaler.inverse_transform(y_pred_full_lstm)[:, 0]
y_pred_real_bilstm = scaler.inverse_transform(y_pred_full_bilstm)[:, 0]

# --- Compute metrics ---
rmse_lstm = np.sqrt(mean_squared_error(y_test_real, y_pred_real_lstm))
rmse_bilstm = np.sqrt(mean_squared_error(y_test_real, y_pred_real_bilstm))

mae_lstm = mean_absolute_error(y_test_real, y_pred_real_lstm)
mae_bilstm = mean_absolute_error(y_test_real, y_pred_real_bilstm)

print(f"LSTM  â†’ RMSE: {rmse_lstm:.4f}, MAE: {mae_lstm:.4f}")
print(f"BiLSTM â†’ RMSE: {rmse_bilstm:.4f}, MAE: {mae_bilstm:.4f}")

# --- Plot predictions ---
plt.figure(figsize=(12,6))
plt.plot(y_test_real, label='Actual Price', color='black')
plt.plot(y_pred_real_lstm, label='LSTM Predicted', alpha=0.7)
plt.plot(y_pred_real_bilstm, label='Bi-LSTM Predicted', alpha=0.7)
plt.title("Actual vs Predicted Stock Price (LSTM vs Bi-LSTM)")
plt.xlabel("Time")
plt.ylabel("Price")
plt.legend()
plt.show()


In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import numpy as np

# --- Metrics ---
mse_lstm = mean_squared_error(y_test_real, y_pred_real_lstm)
rmse_lstm = np.sqrt(mse_lstm)
mae_lstm = mean_absolute_error(y_test_real, y_pred_real_lstm)
r2_lstm = r2_score(y_test_real, y_pred_real_lstm)

mse_bilstm = mean_squared_error(y_test_real, y_pred_real_bilstm)
rmse_bilstm = np.sqrt(mse_bilstm)
mae_bilstm = mean_absolute_error(y_test_real, y_pred_real_bilstm)
r2_bilstm = r2_score(y_test_real, y_pred_real_bilstm)

# --- Print results ---
print("ðŸ“Š Model Performance Metrics:")
print(f"LSTM   â†’ MSE: {mse_lstm:.4f}, RMSE: {rmse_lstm:.4f}, MAE: {mae_lstm:.4f}, RÂ²: {r2_lstm:.4f}")
print(f"Bi-LSTM â†’ MSE: {mse_bilstm:.4f}, RMSE: {rmse_bilstm:.4f}, MAE: {mae_bilstm:.4f}, RÂ²: {r2_bilstm:.4f}")


In [None]:
from tensorflow.keras.models import save_model
import numpy as np
import matplotlib.pyplot as plt

# --- Save models ---
lstm_model.save("lstm_model_icici.h5")
bilstm_model.save("bilstm_model_icici.h5")
print(" Models saved successfully!")

# --- Predict next 10 future prices using the Bi-LSTM model ---
last_sequence = X[-1]  # last 60 timesteps
future_predictions = []

num_future_steps = 10
current_sequence = last_sequence

for _ in range(num_future_steps):
    next_pred = bilstm_model.predict(current_sequence.reshape(1, 60, X.shape[2]))[0][0]

    # Append prediction and roll the window forward
    future_predictions.append(next_pred)
    current_sequence = np.vstack([current_sequence[1:], np.append(next_pred, [0]*(X.shape[2]-1))])

# Inverse scale predictions to real price range
scaled_future = np.zeros((num_future_steps, data.shape[1]))
scaled_future[:, 0] = future_predictions
future_prices = scaler.inverse_transform(scaled_future)[:, 0]

# --- Plot future predictions ---
plt.figure(figsize=(10,5))
plt.plot(range(len(y_test)), y_test_real, label='Actual Price')
plt.plot(range(len(y_test), len(y_test) + num_future_steps), future_prices, label='Future Forecast', color='red')
plt.title("Future Price Forecast (Next 10 Steps) - Bi-LSTM")
plt.legend()
plt.show()