In [1]:
!pip install numpy pandas matplotlib scikit-learn tensorflow



In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import EarlyStopping

# Load and preprocess the data
df = pd.read_csv('bajaj_stock_data.csv')  # Replace with the path to your CSV file

# Ensure the Date column is in the correct format and set as index
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)

# Drop NaN values
df = df.dropna()

# Scale the features 'Close', 'Open', 'High', 'Low', and 'Volume'
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(df[['Close', 'Open', 'High', 'Low', 'Volume']])

# Create datasets with multiple features (using 'Close', 'Open', 'High', 'Low', 'Volume')
def create_dataset_multifeature(dataset, look_back=120):
    X, y = [], []
    for i in range(look_back, len(dataset)):
        X.append(dataset[i-look_back:i])  # Include all features
        y.append(dataset[i, 0])  # Predict 'Close' price (index 0)
    return np.array(X), np.array(y)

# Increase the look-back window to capture longer trends
look_back = 120
X, y = create_dataset_multifeature(scaled_data, look_back)

# Reshape X to be [samples, time steps, features]
X = np.reshape(X, (X.shape[0], X.shape[1], X.shape[2]))

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# Build the LSTM model with increased complexity
model = Sequential()
model.add(LSTM(units=100, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dropout(0.3))  # Adjust dropout for regularization
model.add(LSTM(units=100, return_sequences=False))
model.add(Dropout(0.3))
model.add(Dense(units=50))  # Extra Dense layer for more complexity
model.add(Dense(units=1))  # Output layer

# Compile the model with a lower learning rate
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])

# Train the model with early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

history = model.fit(X_train, y_train, batch_size=32, epochs=100, validation_split=0.2, callbacks=[early_stopping])

# Predictions and inverse transformation
predictions = model.predict(X_test)

# Inverse transformation for predictions
predictions = scaler.inverse_transform(
    np.concatenate((predictions, np.zeros((predictions.shape[0], X_test.shape[2]-1))), axis=1)
)[:, 0]

# Inverse transform y_test for comparison
y_test_scaled = scaler.inverse_transform(
    np.concatenate((y_test.reshape(-1, 1), np.zeros((y_test.shape[0], X_test.shape[2]-1))), axis=1)
)[:, 0]

# Evaluation metrics
mse = mean_squared_error(y_test_scaled, predictions)
mae = mean_absolute_error(y_test_scaled, predictions)

print(f"Mean Squared Error: {mse}")
print(f"Mean Absolute Error: {mae}")

# Plot the results with enhanced visibility
train = df[:len(X_train) + look_back]
valid = df[len(X_train) + look_back:]
valid['Predictions'] = predictions

plt.figure(figsize=(16,8))
plt.title('LSTM Model with Multiple Features for Stock Market Prediction')
plt.xlabel('Date')
plt.ylabel('Closing Price')
plt.plot(train['Close'], label='Training Data', color='blue')
plt.plot(valid['Close'], label='Actual Price', color='orange')
plt.plot(valid['Predictions'], label='Predicted Price', color='green', linewidth=2)
plt.legend()
plt.show()

FileNotFoundError: [Errno 2] No such file or directory: '/content/BAJFINANCE.NS (5).csv'