<a href="https://colab.research.google.com/github/SnithinPR/Boma/blob/main/Copy_of_Untitled18.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import pandas as pd

# --- 1. Prepare Data and Features (Assuming 'data' DataFrame exists) ---

# Define original features and target
features = ['Date_Ordinal', 'Open', 'High', 'Low', 'Close', 'Volume']
target = 'Next_Day_Close'

# Check if 'Next_Day_Close' exists, if not, create it
if target not in data.columns:
    data[target] = data['Close'].shift(-1)
    data.dropna(inplace=True)

# Select the required data subset
data_subset = data[features + [target]].copy()

# --- 2. Scaling ---
# Scale all features and the target. MinMaxScaler is common for LSTMs.
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(data_subset.values)

# Separate scaled X and y
num_features = len(features)
scaled_X = scaled_data[:, :num_features]
scaled_y = scaled_data[:, num_features]

# --- 3. Time Series Data Restructuring (The LSTM requirement) ---
# LSTMs require data in the format: (samples, timesteps, features)
# look_back = 1 means the model uses a single day's data to predict the next.
look_back = 1
X_3D = np.reshape(scaled_X, (scaled_X.shape[0], look_back, scaled_X.shape[1]))

# --- 4. Split Data ---
# Standard 80/20 time-series split (no shuffling is critical for time series)
train_size = int(len(X_3D) * 0.8)
X_train, X_test = X_3D[:train_size], X_3D[train_size:]
y_train, y_test = scaled_y[:train_size], scaled_y[train_size:]

# --- 5. Build the LSTM Model ---
model = Sequential()
# LSTM layer with 50 units
model.add(LSTM(units=50, return_sequences=False,
               input_shape=(X_train.shape[1], X_train.shape[2])))
# Output layer for regression (single price prediction)
model.add(Dense(units=1))

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# --- 6. Train the Model ---
# Training with more epochs (e.g., 50-100) is often necessary for LSTMs
print("Training LSTM Model...")
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.1, verbose=0)
print("Training Complete.")

# --- 7. Make Predictions ---
# Predict scaled prices
scaled_predictions = model.predict(X_test)

# Inverse transform predictions and actuals to original dollar values
# We must use a dummy array to inverse-transform only the target column (-1 index)
def inverse_transform_data(scaled_data_arr, num_features, scaler):
    dummy_array = np.zeros((scaled_data_arr.shape[0], num_features + 1))
    dummy_array[:, -1] = scaled_data_arr[:, 0] if scaled_data_arr.ndim > 1 else scaled_data_arr
    return scaler.inverse_transform(dummy_array)[:, -1]

predictions = inverse_transform_data(scaled_predictions, num_features, scaler)
y_test_original = inverse_transform_data(y_test, num_features, scaler)

# --- 8. Evaluate Metrics ---
rmse = np.sqrt(mean_squared_error(y_test_original, predictions))
mae = mean_absolute_error(y_test_original, predictions)
r2 = r2_score(y_test_original, predictions)

print("\n--- LSTM Model Evaluation (Raw Features Only) ---")
print(f"Root Mean Squared Error (RMSE): ${rmse:.4f}")
print(f"Mean Absolute Error (MAE): ${mae:.4f}")
print(f"R-squared (R²): {r2:. pi 4f}")

# --- 9. Display Results ---
results_df = pd.DataFrame({
    'Actual Next Day Close': y_test_original,
    'Predicted Next Day Close': predictions
})
print("\nActual vs. Predicted Prices (Test Set Head):")
print(results_df.head())


  super().__init__(**kwargs)


Training LSTM Model...
Training Complete.
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step

--- LSTM Model Evaluation (Raw Features Only) ---
Root Mean Squared Error (RMSE): $3.0423
Mean Absolute Error (MAE): $2.4215
R-squared (R²): 0.9215

Actual vs. Predicted Prices (Test Set Head):
   Actual Next Day Close  Predicted Next Day Close
0             151.092697                152.040838
1             153.917252                152.067736
2             153.077759                154.202232
3             155.448029                155.693657
4             157.304688                156.220574
