In [1]:
# --- 1. IMPORT LIBRARIES ---
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
import math
from sklearn.metrics import mean_squared_error

# --- 2. LOAD & MERGE DATA (Assuming files are in the same directory or provide full path) ---
# It's better to provide full, absolute paths if the notebook is in a different folder.
try:
    tesla_data = pd.read_csv('C:/Users/hp/Downloads/Tesla.csv')
    nifty_data = pd.read_csv('C:/Users/hp/Downloads/NIFTY 50 - HistoricalPE_PBDIV_Data.csv')
    gold_data = pd.read_csv('C:/Users/hp/Downloads/Gold price INR.csv')
except FileNotFoundError:
    print("One or more data files not found. Please check paths.")
    # Exit or handle error appropriately
    
# --- 3. PREPROCESS DATA ---
# Convert 'Date' columns to datetime objects
tesla_data['Date'] = pd.to_datetime(tesla_data['Date'], format='%m/%d/%Y')
nifty_data['Date'] = pd.to_datetime(nifty_data['Date'], format='%d %b %Y')
gold_data['Date'] = pd.to_datetime(gold_data['Date'], format='%d/%m/%Y')

# Merge datasets
merged_data = pd.merge(tesla_data, nifty_data, on='Date', how='outer')
merged_data = pd.merge(merged_data, gold_data, on='Date', how='outer')
merged_data.sort_values('Date', inplace=True)
merged_data.fillna(method='ffill', inplace=True) # Forward fill to handle missing values

# --- 4. FEATURE ENGINEERING ---
# Using a simplified but effective set of features
merged_data['Daily Return'] = merged_data['Close'].pct_change()
merged_data['5-Day MA'] = merged_data['Close'].rolling(window=5).mean()
merged_data['20-Day MA'] = merged_data['Close'].rolling(window=20).mean()

# Define the features to be used for training the model
features_to_use = [
    'Close', 'Open', 'High', 'Low', 'Volume', 
    'P/E', 'P/B', 'INR', 'Daily Return', '5-Day MA', '20-Day MA'
]

# Create a clean DataFrame for training, ensure all are float
data_for_training = merged_data[features_to_use].astype(float)

# IMPORTANT: Drop any rows with NaN values created during feature engineering (like the first few MAs)
data_for_training.dropna(inplace=True)

print("Training data shape after feature engineering and dropping NaNs:", data_for_training.shape)
print("Features being used:", data_for_training.columns.tolist())


# --- 5. SCALE & PREPARE DATA FOR LSTM ---
# Scale all features to a range of 0 to 1
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(data_for_training)

# Create a separate scaler ONLY for the 'Close' price (our target).
# This is essential for accurately converting predictions back to actual dollar values.
close_price_scaler = MinMaxScaler(feature_range=(0, 1))
close_price_scaler.fit(data_for_training[['Close']])

# Function to create time-series data
def create_dataset(dataset, time_step=1):
    X, Y = [], []
    for i in range(time_step, len(dataset)):
        X.append(dataset[i-time_step:i, :]) # All feature columns for X
        Y.append(dataset[i, 0])             # Just the 'Close' price (column 0) for Y
    return np.array(X), np.array(Y)

# Use a 60-day lookback period
time_step = 60
X, y = create_dataset(scaled_data, time_step)

# --- 6. SPLIT DATA INTO TRAINING AND TESTING SETS ---
train_size = int(len(X) * 0.8)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

print(f"X_train shape: {X_train.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"X_test shape: {X_test.shape}")
print(f"y_test shape: {y_test.shape}")

# --- 7. BUILD, TRAIN, AND SAVE THE LSTM MODEL ---
num_features = X_train.shape[2] # Get the number of features from the data

model = Sequential()
model.add(LSTM(100, return_sequences=True, input_shape=(time_step, num_features))) # Layer 1
model.add(Dropout(0.3))
model.add(LSTM(50, return_sequences=False)) # Layer 2
model.add(Dropout(0.3))
model.add(Dense(1)) # Output layer

# Use a slightly lower learning rate for more stable convergence
optimizer = Adam(learning_rate=0.001)
model.compile(optimizer=optimizer, loss='mean_squared_error')

print("\n--- Starting Model Training ---")
model.fit(X_train, y_train, epochs=20, batch_size=32, verbose=1)

# --- 8. EVALUATE THE MODEL ---
# Make predictions on the test set
test_predictions_scaled = model.predict(X_test)

# Inverse transform the scaled predictions to get actual prices
test_predictions = close_price_scaler.inverse_transform(test_predictions_scaled)

# Inverse transform the actual test values for comparison
y_test_actual = close_price_scaler.inverse_transform(y_test.reshape(-1, 1))

# Calculate Root Mean Squared Error (RMSE)
rmse = math.sqrt(mean_squared_error(y_test_actual, test_predictions))
print(f"\nModel Test RMSE: ${rmse:.2f}")

# --- 9. SAVE THE FINAL MODEL ---
# This is the file you will copy to your Django project
model.save('stock_model.keras')
print("\nModel successfully saved as 'stock_model.keras'")

One or more data files not found. Please check paths.


NameError: name 'tesla_data' is not defined