In [1]:
import pandas as pd

# Load the dataset
file_path = r'C:\Users\A\Desktop\The Investment Compass\TATAMOTORS.NS.csv'
tata_motors_data = pd.read_csv(file_path)

# Display the first few rows of the data
print(tata_motors_data.head())


         Date        Open        High         Low       Close   Adj Close  \
0  2018-01-01  430.950012  436.399994  422.250000  424.450012  423.131256   
1  2018-01-02  428.850006  440.850006  422.000000  439.299988  437.935089   
2  2018-01-03  440.399994  441.399994  431.950012  433.899994  432.551849   
3  2018-01-04  430.000000  433.299988  425.750000  429.950012  428.614166   
4  2018-01-05  431.250000  436.350006  429.799988  431.600006  430.259033   

     Volume  
0   6807536  
1  15331261  
2   9794953  
3   8395377  
4   7021611  


In [2]:
# Convert the 'Date' column to datetime format
tata_motors_data['Date'] = pd.to_datetime(tata_motors_data['Date'])

# Set the 'Date' column as the index
tata_motors_data.set_index('Date', inplace=True)

# Check for missing values
print(tata_motors_data.isnull().sum())

# Fill missing values if any
tata_motors_data.fillna(method='ffill', inplace=True)

Open         0
High         0
Low          0
Close        0
Adj Close    0
Volume       0
dtype: int64


In [3]:
# Select features and target variable
features = ['Open', 'High', 'Low', 'Volume']
target = 'Close'

# Split the data into training and testing sets
train_size = int(len(tata_motors_data) * 0.8)
train_data = tata_motors_data.iloc[:train_size]
test_data = tata_motors_data.iloc[train_size:]

In [4]:
from sklearn.preprocessing import MinMaxScaler
import numpy as np
from keras.models import Sequential
from keras.layers import LSTM, Dense

# Normalize the data
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_train_data = scaler.fit_transform(train_data)

# Prepare the data for LSTM
def create_dataset(data, time_step=1):
    X, y = [], []
    for i in range(len(data)-time_step-1):
        X.append(data[i:(i+time_step), 0])
        y.append(data[i + time_step, 0])
    return np.array(X), np.array(y)

time_step = 100
X_train, y_train = create_dataset(scaled_train_data, time_step)

# Reshape input to be [samples, time steps, features]
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)

# Build the LSTM model
model = Sequential()
model.add(LSTM(50, return_sequences=True, input_shape=(time_step, 1)))
model.add(LSTM(50, return_sequences=False))
model.add(Dense(25))
model.add(Dense(1))

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model.fit(X_train, y_train, batch_size=64, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x1dd19a1a2e0>

In [5]:
# Prepare the test data
scaled_test_data = scaler.transform(test_data)
X_test, y_test = create_dataset(scaled_test_data, time_step)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

# Make predictions
predictions = model.predict(X_test)
predictions = scaler.inverse_transform(predictions)

# Evaluate the model
from sklearn.metrics import mean_squared_error
rmse = np.sqrt(mean_squared_error(y_test, predictions))
print(f'Root Mean Squared Error: {rmse}')



ValueError: non-broadcastable output operand with shape (147,1) doesn't match the broadcast shape (147,6)

In [8]:
from sklearn.metrics import mean_squared_error

# Normalize only the closing prices
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_train_data = scaler.fit_transform(train_data[target].values.reshape(-1, 1))

# Modify the create_dataset function to work with 1D arrays
def create_dataset(data, time_step=1):
    X, y = [], []
    for i in range(len(data) - time_step - 1):
        X.append(data[i:(i + time_step), 0])
        y.append(data[i + time_step, 0])
    return np.array(X), np.array(y)

# Prepare the data for LSTM
time_step = 100
X_train, y_train = create_dataset(scaled_train_data, time_step)

# Reshape input to be [samples, time steps, features]
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)

# Train the model (same as before)

# Prepare the test data
scaled_test_data = scaler.transform(test_data[target].values.reshape(-1, 1))
X_test, y_test = create_dataset(scaled_test_data, time_step)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

# Make predictions
predictions = model.predict(X_test)
predictions = scaler.inverse_transform(predictions)

# Evaluate the model
rmse = np.sqrt(mean_squared_error(y_test, predictions))
print(f'Root Mean Squared Error: {rmse}')


Root Mean Squared Error: 434.1021762812477


In [9]:
# Assume you have the most recent 100 days of data for input
recent_data = scaled_train_data[-100:]
recent_data = recent_data.reshape(1, recent_data.shape[0], 1)

# Generate predictions for the next 3 days
next_3_days_predictions = []
for _ in range(3):
    # Predict the next day's price
    next_day_pred = model.predict(recent_data)
    next_3_days_predictions.append(next_day_pred[0][0])
    
    # Update recent_data to include the predicted value and remove the oldest value
    recent_data = np.append(recent_data[:, 1:, :], next_day_pred.reshape(1, 1, 1), axis=1)

# Inverse transform the predictions to get the actual price values
next_3_days_predictions = scaler.inverse_transform(np.array(next_3_days_predictions).reshape(-1, 1))
print("Predictions for the next 3 days:", next_3_days_predictions.flatten())


Predictions for the next 3 days: [477.94522 478.39774 478.98425]
