In [None]:
import yfinance as yf
import pandas as pd

In [None]:
# Example: Download data for NTPC(NTPC.NS) for the past 10 year with 1-day intervals
ticker = "NTPC.NS"
data = yf.download(ticker, period="10y", interval="1d")

# Save to CSV
csv_filename = f"{ticker.replace('.', '_')}_stock_data.csv"
data.to_csv(csv_filename)

print(f"Data saved to {csv_filename}")

In [None]:
data=pd.read_csv(csv_filename)

In [None]:
data

In [None]:
data=data.drop([0,1])

In [None]:
data = data.reset_index(drop=True)   # Reset Index After Deleting Rows

In [None]:
data

In [None]:
# Rename the 'Price' column to 'Date'
data.rename(columns={'Price': 'Date'}, inplace=True)

# Show the updated column names
print(data.columns)

In [None]:
data.to_csv("cleaned_stock_data.csv", index=False)

In [None]:
data.dtypes

In [None]:
cols_to_convert = data.columns.difference(['Date'])
data[cols_to_convert] = data[cols_to_convert].apply(pd.to_numeric, errors='coerce')

In [None]:
data.dtypes

In [None]:
import matplotlib.pyplot as plt

# Ensure 'Date' is datetime (in case not already done)
data['Date'] = pd.to_datetime(data['Date'], errors='coerce')

# Plot
plt.figure(figsize=(12, 6))
plt.plot(data['Date'], data['Close'], color='blue', label='Close Price')
plt.xlabel('Date')
plt.ylabel('Close Price')
plt.title('Stock Close Price Over Time')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
data.round(4)

In [None]:
data

**Feature Enrichment: Adding Technical Indicators**

In [None]:
# !pip install pandas_ta
# import pandas_ta as ta

In [None]:

# # --- 1. Calculate Technical Indicators ---

# # Simple Moving Averages (Trend Indicator)
# data.ta.sma(length=20, append=True) # Adds a 'SMA_20' column
# data.ta.sma(length=50, append=True) # Adds a 'SMA_50' column

# # Relative Strength Index (Momentum Indicator)
# data.ta.rsi(length=14, append=True) # Adds a 'RSI_14' column

# # Moving Average Convergence Divergence (MACD - Momentum Indicator)
# data.ta.macd(fast=12, slow=26, signal=9, append=True) # Adds MACD, MACDh, MACDs columns



In [None]:
# data

In [None]:
# # --- 2. Select Relevant Features ---

# # Drop original 'Open', 'High', 'Low', 'Adj Close' as 'Close' and the TIs capture this info.
# # We keep 'Close' as the target and 'Volume' as a direct feature.
# features_to_keep = ['Close', 'Volume', 'SMA_20', 'SMA_50', 'RSI_14', 'MACD_12_26_9', 'MACDh_12_26_9', 'MACDs_12_26_9']
# data1 = data[features_to_keep]



In [None]:
# data1

In [None]:
# # --- 3. Handle NaN Values ---
# # Indicators like SMA_50 and RSI need 50 days of data, creating NaNs at the start.
# print(f"Original rows: {len(data1)}")
# data1.dropna(inplace=True)
# print(f"Rows after dropping NaNs: {len(data1)} (Data is ready for scaling)")

In [None]:
# data1

In [None]:
# from sklearn.preprocessing import MinMaxScaler
# from sklearn.model_selection import train_test_split

# # Assume 'data' is the DataFrame from the previous step (after dropping NaNs)
# df_values = data1.values
# target_column_index = 0 # Assuming 'Close' is the first column

# df_values

In [None]:
# # --- 1. Split Data into Train and Test Sets ---
# # Use a time-series split (e.g., first 80% for train, last 20% for test)
# train_size = int(len(df_values) * 0.80)
# train_data = df_values[:train_size]
# test_data = df_values[train_size:]

# train_size

In [None]:
# # --- 2. Initialize and Fit Scaler on TRAIN DATA ONLY ---
# # The scaler will be used to scale all features (i.e., all columns)
# scaler = MinMaxScaler(feature_range=(0, 1))
# train_scaled = scaler.fit_transform(train_data)

# print(train_scaled)

# # --- 3. Transform Test Data ---
# test_scaled = scaler.transform(test_data)

In [None]:
# import numpy as np

# # **IMPORTANT:** The 'scaler' object is now fitted and SAVED.
# # You will pass this scaler object to your evaluation step later.

# # --- 4. Sequence Creation ---
# # (You already have this step, but here's the structure for completeness)
# def create_sequences(data, lookback):
#     X, Y = [], []
#     for i in range(len(data) - lookback):
#         X.append(data[i:(i + lookback), :]) # All features in the lookback window
#         Y.append(data[i + lookback, target_column_index]) # Only the 'Close' price (index 0)
#     return np.array(X), np.array(Y)

# lookback = 60 # Set your desired lookback window (e.g., 60 days)
# X_train, y_train = create_sequences(train_scaled, lookback)
# X_test, y_test = create_sequences(test_scaled, lookback)

# # X_train, y_train, X_test, y_test are now ready for model training.

In [None]:
# # --- ASSUME:
# # 1. 'scaler' is the fitted MinMaxScaler object saved in Step 2.1
# # 2. 'predicted_scaled' is the output of model.predict(X_test).shape (N, 1)
# # 3. 'y_test' is the actual scaled target (Close price).

# # Step 1: Create a dummy array for predictions
# # The scaler was fitted on ALL features, so we need a temporary array
# # with the shape of the original data (N, number_of_features)

# num_features = data.shape[1] # e.g., 8 features (Close, Volume, TIs...)
# dummy_test_array = np.zeros((len(y_test), num_features))

# # Step 2: Place the SCALED predictions into the first column (where 'Close' was)
# dummy_test_array[:, target_column_index] = predicted_scaled.flatten()

# # Step 3: INVERT the predictions back to the original price scale
# predicted_price = scaler.inverse_transform(dummy_test_array)[:, target_column_index]

# # Step 4: INVERT the actual values for correct comparison
# # This requires a similar process for y_test
# dummy_actual_array = np.zeros((len(y_test), num_features))
# dummy_actual_array[:, target_column_index] = y_test.flatten()
# actual_price = scaler.inverse_transform(dummy_actual_array)[:, target_column_index]

# # Step 5: Calculate Metrics on UN-SCALED Data
# from sklearn.metrics import mean_squared_error, mean_absolute_error
# import math

# mse = mean_squared_error(actual_price, predicted_price)
# rmse = math.sqrt(mse)
# mae = mean_absolute_error(actual_price, predicted_price)

# print(f"MAE (Unscaled): {mae:.4f}")
# print(f"RMSE (Unscaled): {rmse:.4f}")

**LSTM**

In [None]:
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

In [None]:
# Use the 'Close' price only
close_data = data[['Close']].dropna()

# Scale the data between 0 and 1
scaler = MinMaxScaler()
scaled_close = scaler.fit_transform(close_data)

# Create sequences (e.g., use last 60 days to predict the next one)
X, y = [], []
window_size = 60

for i in range(window_size, len(scaled_close)):
    X.append(scaled_close[i - window_size:i])
    y.append(scaled_close[i])

X = np.array(X)
y = np.array(y)

# Reshape for LSTM [samples, timesteps, features]
X = X.reshape((X.shape[0], X.shape[1], 1))

In [None]:
train_size = int(len(X) * 0.8)
print(train_size)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

In [None]:
import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras.optimizers import Adam

class SAMOptimizer(tf.keras.optimizers.Optimizer):
    def __init__(self, base_optimizer, rho=0.05, name="SAM", **kwargs):
        super().__init__(name, **kwargs)
        self._optimizer = base_optimizer
        self.rho = rho

    @tf.function
    def _compute_gradients(self, loss, var_list, tape):
        return tape.gradient(loss, var_list)

    def apply_gradients(self, grads_and_vars, **kwargs):
        return self._optimizer.apply_gradients(grads_and_vars, **kwargs)

    def first_step(self, gradients, variables):
        grad_norm = tf.linalg.global_norm(gradients)
        scale = self.rho / (grad_norm + 1e-12)

        # perturb weights
        self.e_ws = []
        for v, g in zip(variables, gradients):
            e_w = g * scale
            v.assign_add(e_w)
            self.e_ws.append(e_w)

    def second_step(self, gradients, variables):
        # restore weights
        for v, e_w in zip(variables, self.e_ws):
            v.assign_sub(e_w)

        # apply gradients from second step
        self.apply_gradients(zip(gradients, variables))

def train_with_sam(model, optimizer, x, y):
    # Custom training step implementing SAM
    with tf.GradientTape() as tape:
        y_pred = model(x, training=True)
        loss = model.compiled_loss(y, y_pred)
    gradients = tape.gradient(loss, model.trainable_variables)

    # Step 1: move to perturbed weights
    optimizer.first_step(gradients, model.trainable_variables)

    # Step 2: recompute loss at perturbed weights
    with tf.GradientTape() as tape2:
        y_pred2 = model(x, training=True)
        loss2 = model.compiled_loss(y, y_pred2)
    gradients2 = tape2.gradient(loss2, model.trainable_variables)
    optimizer.second_step(gradients2, model.trainable_variables)
    return loss, loss2


In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

model = Sequential()
model.add(LSTM(units=50, return_sequences=False, input_shape=(X_train.shape[1], 1)))
model.add(Dropout(0.2))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mean_squared_error')

In [None]:
print(X_train.shape[1])

In [None]:
model.fit(X_train, y_train, epochs=25, batch_size=32, validation_data=(X_test, y_test))

In [None]:
predicted_scaled = model.predict(X_test)
predicted = scaler.inverse_transform(predicted_scaled)
actual = scaler.inverse_transform(y_test.reshape(-1, 1))

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(12,6))
plt.plot(actual, label='Actual Closing Price')
plt.plot(predicted, label='Predicted Closing Price')
plt.title('NTPC Stock Price Prediction using LSTM')
plt.xlabel('Time')
plt.ylabel('Stock Price')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error
import math

mse = mean_squared_error(actual, predicted)
rmse = math.sqrt(mse)
mae = mean_absolute_error(actual, predicted)

print(f"MAE: {mae:.2f}")
print(f"RMSE: {rmse:.2f}")

**LSTM + DNN**

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

Model = Sequential()

# LSTM layer
Model.add(LSTM(units=64, return_sequences=False, input_shape=(X_train.shape[1], 1)))
Model.add(Dropout(0.2))

# DNN layers (fully connected)
Model.add(Dense(units=64, activation='relu'))
Model.add(Dropout(0.2))
Model.add(Dense(units=32, activation='relu'))
Model.add(Dropout(0.2))

# Output layer
Model.add(Dense(units=1))  # Predicting 1 value (stock price)

# Compile the model
Model.compile(optimizer='adam', loss='mean_squared_error')
Model.summary()

In [None]:
history = Model.fit(X_train, y_train, epochs=25, batch_size=32, validation_data=(X_test, y_test))


In [None]:
# Predict
predicted_scaled = Model.predict(X_test)
predicted = scaler.inverse_transform(predicted_scaled)
actual = scaler.inverse_transform(y_test.reshape(-1, 1))

# Plot
plt.figure(figsize=(12,6))
plt.plot(actual, label='Actual Price')
plt.plot(predicted, label='Predicted Price')
plt.title('NTPC Stock Price Prediction (LSTM + DNN)')
plt.xlabel('Time')
plt.ylabel('Price')
plt.legend()
plt.grid(True)
plt.show()


In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error
import math

mse = mean_squared_error(actual, predicted)
rmse = math.sqrt(mse)
mae = mean_absolute_error(actual, predicted)

print(f"MAE: {mae:.4f}")
print(f"RMSE: {rmse:.4f}")

**BILSTM + DNN**

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Bidirectional, LSTM

mod = Sequential()

# BiLSTM Layer
mod.add(Bidirectional(LSTM(64, return_sequences=False), input_shape=(X_train.shape[1], 1)))
mod.add(Dropout(0.2))

# DNN Layers
mod.add(Dense(64, activation='relu'))
mod.add(Dropout(0.2))
mod.add(Dense(32, activation='relu'))
mod.add(Dropout(0.2))

# Output Layer
mod.add(Dense(1))  # Predict next day's price

# Compile Model
mod.compile(optimizer='adam', loss='mean_squared_error')
mod.summary()

In [None]:
hist = mod.fit(X_train, y_train, epochs=25, batch_size=32, validation_data=(X_test, y_test))


In [None]:
predicted_scaled = mod.predict(X_test)
predicted = scaler.inverse_transform(predicted_scaled)
actual = scaler.inverse_transform(y_test.reshape(-1, 1))


In [None]:
plt.figure(figsize=(12, 6))
plt.plot(actual, label='Actual')
plt.plot(predicted, label='Predicted')
plt.title('NTPC Stock Price Prediction (BiLSTM + DNN)')
plt.xlabel('Time')
plt.ylabel('Stock Price (INR)')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error
import math

mse = mean_squared_error(actual, predicted)
rmse = math.sqrt(mse)
mae = mean_absolute_error(actual, predicted)

print(f"MAE: {mae:.4f}")
print(f"RMSE: {rmse:.4f}")
