In [1]:
import numpy as np
import pandas as pd
import yfinance as yf
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout

In [2]:
# Read stock symbols from CSV
csv_file = "stock_symbol.csv"
symbols_df = pd.read_csv(csv_file)

# Validate CSV data
if 'Symbol' not in symbols_df.columns:
    raise ValueError("CSV file must contain a 'Symbol' column.")

stock_symbols = symbols_df['Symbol'].tolist()


In [3]:

start_date = "2010-01-01"
end_date = pd.Timestamp.today()
data = yf.download(stock_symbols, start = start_date, end = end_date)
data.reset_index(inplace = True)
data

YF.download() has changed argument auto_adjust default to True


[*********************100%***********************]  6 of 6 completed


Price,Date,Close,Close,Close,Close,Close,Close,High,High,High,...,Open,Open,Open,Open,Volume,Volume,Volume,Volume,Volume,Volume
Ticker,Unnamed: 1_level_1,AAPL,AMZN,GOOG,MSFT,NFLX,TSLA,AAPL,AMZN,GOOG,...,GOOG,MSFT,NFLX,TSLA,AAPL,AMZN,GOOG,MSFT,NFLX,TSLA
0,2010-01-04,6.440332,6.695000,15.536651,23.254051,7.640000,,6.455078,6.830500,15.605068,...,15.541608,23.006108,7.931429,,493729600,151998000,78541293,38409100,17239600,
1,2010-01-05,6.451466,6.734500,15.468233,23.261564,7.358571,,6.487879,6.774000,15.563671,...,15.547310,23.178918,7.652857,,601904800,177038000,120638494,49749600,23753100,
2,2010-01-06,6.348846,6.612500,15.078297,23.118816,7.617143,,6.477045,6.736500,15.514587,...,15.514587,23.201463,7.361429,,552160000,143576000,159744526,58182400,23290400,
3,2010-01-07,6.337111,6.500000,14.727282,22.878378,7.485714,,6.379844,6.616000,15.121431,...,15.106557,23.013618,7.731429,,477131200,220604000,257533695,50559700,9955400,
4,2010-01-08,6.379240,6.676000,14.923613,23.036160,7.614286,,6.379842,6.684000,14.954103,...,14.675224,22.750650,7.498571,,447610800,196610000,189680313,51197400,8180900,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3833,2025-03-31,222.130005,190.259995,156.229996,375.390015,932.530029,259.160004,225.619995,191.330002,157.130005,...,154.809998,372.540009,920.530029,249.309998,65299300,63547600,33591600,35184700,4634100,134008900.0
3834,2025-04-01,223.190002,192.169998,158.880005,382.190002,928.380005,268.459991,223.679993,193.929993,160.080002,...,155.300003,374.649994,927.500000,263.799988,36412700,41267300,20111400,19689500,3520000,146486900.0
3835,2025-04-02,223.889999,196.009995,158.860001,382.140015,935.520020,282.760010,225.190002,198.339996,160.274994,...,156.960007,377.970001,923.000000,254.600006,35905900,53679200,17113300,16092600,3256900,212787800.0
3836,2025-04-03,203.190002,178.410004,152.630005,373.109985,917.049988,267.279999,207.490005,184.130005,154.686996,...,152.835007,374.790009,901.799988,265.290009,103419000,95553600,28416100,30198000,5864600,136174300.0


In [4]:
# Dictionary to store results for each stock
predictions_dict = {}

for stock_symbol in stock_symbols:
    print(f"Processing stock: {stock_symbol}")
    data = yf.download(stock_symbol, start=start_date, end=end_date)
    data.reset_index(inplace=True)

    # Determine the cutoff point (80% for training, 20% for testing)
    cutoff = int(len(data) * 0.8)

    # Step 1: Prepare the training and testing data using iloc
    data_train = data.iloc[:cutoff][['Open', 'Close']].copy()
    data_test = data.iloc[cutoff:][['Open', 'Close']].copy()

    # Display the shapes of both datasets
    print(f"Total data points: {len(data)}")
    print(f"Training Data shape: {data_train.shape}")
    print(f"Testing Data shape: {data_test.shape}")


Processing stock: GOOG


[*********************100%***********************]  1 of 1 completed


Total data points: 3838
Training Data shape: (3070, 2)
Testing Data shape: (768, 2)
Processing stock: AAPL


[*********************100%***********************]  1 of 1 completed


Total data points: 3838
Training Data shape: (3070, 2)
Testing Data shape: (768, 2)
Processing stock: MSFT


[*********************100%***********************]  1 of 1 completed


Total data points: 3838
Training Data shape: (3070, 2)
Testing Data shape: (768, 2)
Processing stock: AMZN


[*********************100%***********************]  1 of 1 completed


Total data points: 3838
Training Data shape: (3070, 2)
Testing Data shape: (768, 2)
Processing stock: TSLA


[*********************100%***********************]  1 of 1 completed


Total data points: 3716
Training Data shape: (2972, 2)
Testing Data shape: (744, 2)
Processing stock: NFLX


[*********************100%***********************]  1 of 1 completed

Total data points: 3838
Training Data shape: (3070, 2)
Testing Data shape: (768, 2)





In [5]:
# Step 2: Scale the data using MinMaxScaler
scaler = MinMaxScaler(feature_range=(0, 1))
data_train_scaled = scaler.fit_transform(data_train)

In [6]:
# Step 3: Prepare the input (x) and output (y) for the LSTM model
x = []
y = []

# Create sequences of data
time_steps = 60  # Number of time steps to look back

for i in range(time_steps, data_train_scaled.shape[0]):
    x.append(data_train_scaled[i - time_steps:i])  # Previous 60 time steps
    y.append(data_train_scaled[i])  # Current Open and Close prices

x, y = np.array(x), np.array(y)

In [7]:
# Step 5: Build the LSTM model
model = Sequential()

model.add(LSTM(units=50, activation='relu', return_sequences=True, input_shape=(x.shape[1], x.shape[2])))
model.add(Dropout(0.2))

model.add(LSTM(units=60, activation='relu', return_sequences=True))
model.add(Dropout(0.3))

model.add(LSTM(units=80, activation='relu', return_sequences=True))
model.add(Dropout(0.4))

model.add(LSTM(units=120, activation='relu'))
model.add(Dropout(0.5))

# Output layer to predict both Open and Close prices
model.add(Dense(units=2))  # 2 units for Open and Close prices

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

In [8]:
# Step 6: Train the model
model.fit(x, y, epochs=100, batch_size=32)  # Adjust epochs and batch size as needed

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.History at 0x2300ef2d420>

In [9]:
# Step 7: Prepare the test data (you can do this in a similar way)
data_test_scaled = scaler.transform(data_test)

x_test = []
for i in range(time_steps, data_test_scaled.shape[0]):
    x_test.append(data_test_scaled[i - time_steps:i])

x_test = np.array(x_test)
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], x_test.shape[2]))

In [10]:
# Step 8: Make predictions
predictions = model.predict(x_test)



In [11]:
# Step 9: Inverse transform the predictions to get actual prices
predictions_inverse = scaler.inverse_transform(predictions)

In [12]:
# predictions_inverse will contain two columns: [Predicted Open, Predicted Close]
predicted_open_price = predictions_inverse[:, 0]
predicted_close_price = predictions_inverse[:, 1]

In [13]:
# Save the predicted prices in the dictionary
predictions_dict[stock_symbol] = {
    "Predicted_Open_Prices": predicted_open_price,
    "Predicted_Close_Prices": predicted_close_price
}

In [14]:
# Save the trained model for each stock
model.save(f'stock_price_model_GOOG.h5')
model.save(f'stock_price_model_AAPL.h5')
model.save(f'stock_price_model_MSFT.h5')
model.save(f'stock_price_model_AMZN.h5')
model.save(f'stock_price_model_TSLA.h5')
model.save(f'stock_price_model_NFLX.h5')

In [15]:
# Save predictions to a CSV file
for symbol, prediction in predictions_dict.items():
    df = pd.DataFrame({
        'Predicted_Open': prediction['Predicted_Open_Prices'],
        'Predicted_Close': prediction['Predicted_Close_Prices']
    })
    df.to_csv(f'predictions_GOOG.csv', index=False)
    df.to_csv(f'predictions_AAPL.csv', index=False)
    df.to_csv(f'predictions_MSFT.csv', index=False)
    df.to_csv(f'predictions_AMZN.csv', index=False)
    df.to_csv(f'predictions_TSLA.csv', index=False)
    df.to_csv(f'predictions_NFLX.csv', index=False)

print("All models trained and predictions saved.")

All models trained and predictions saved.
