In [None]:
!pip install yfinance pandas matplotlib



In [None]:
# Apple Stock Price Prediction - Data Preparation
# This script downloads Apple stock data and prepares it for deep learning

import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
from tqdm.notebook import tqdm

# Set random seed for reproducibility
np.random.seed(42)

# Download Apple's complete stock history
print("Downloading Apple stock data from Yahoo Finance...")
aapl_data = yf.download("AAPL", period="max")

# Display info about the dataset
print(f"Dataset shape: {aapl_data.shape}")
print(f"Date range: {aapl_data.index.min()} to {aapl_data.index.max()}")
print(f"Total days: {aapl_data.shape[0]}")
print(f"Total datapoints: {aapl_data.shape[0] * aapl_data.shape[1]}")

# Preview the data
print("\nData Preview:")
print(aapl_data.head())

# Check for missing values
print("\nMissing values:")
print(aapl_data.isnull().sum())

# Calculate technical indicators
print("\nCalculating technical indicators...")
df = aapl_data.copy()

# 1. RSI (14-day)
delta = df['Close'].diff()
gain = delta.where(delta > 0, 0)
loss = -delta.where(delta < 0, 0)
avg_gain = gain.rolling(window=14).mean()
avg_loss = loss.rolling(window=14).mean()
rs = avg_gain / avg_loss
df['RSI_14'] = 100 - (100 / (1 + rs))

# 2. Add date-based features
df['Day_of_Week'] = df.index.dayofweek
df['Month'] = df.index.month
df['Day_of_Month'] = df.index.day

# 3. Price patterns - gaps
df['Gap_Up'] = ((df['Open'] > df['Close'].shift(1)) * 1)
df['Gap_Down'] = ((df['Open'] < df['Close'].shift(1)) * 1)

# 4. Add our target variable - next day's opening price
df['Next_Day_Open'] = df['Open'].shift(-1)

# Remove rows with NaN values (from rolling calculations)
df_clean = df.dropna()
print(f"Original features: {aapl_data.shape[1]}")
print(f"Expanded features: {df.shape[1]}")
print(f"Total datapoints after feature engineering: {df_clean.shape[0] * df_clean.shape[1]}")

# Show all features
print("\nAvailable features in the dataset:")
for i, col in enumerate(df_clean.columns):
    print(f"{i+1}. {col}")

# Prepare data for model training
print("\nPreparing data for training...")

# Define features and target
X = df_clean.drop(['Next_Day_Open'], axis=1)
y = df_clean['Next_Day_Open']

# Feature scaling
print("Applying MinMax scaling to features...")
scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()

X_scaled = scaler_X.fit_transform(X)
y_scaled = scaler_y.fit_transform(y.values.reshape(-1, 1)).flatten()

# Create sequences for time series prediction
def create_sequences(X, y, time_steps=90):
    X_seq, y_seq = [], []
    for i in range(len(X) - time_steps):
        X_seq.append(X[i:i + time_steps])
        y_seq.append(y[i + time_steps])
    return np.array(X_seq), np.array(y_seq)

# Define sequence length (lookback period)
sequence_length = 90  # Using 90 days of data to predict the next day

print(f"Creating sequences with lookback period of {sequence_length} days...")
X_seq, y_seq = create_sequences(X_scaled, y_scaled, sequence_length)

print(f"Sequence shape: {X_seq.shape}")
print(f"Target shape: {y_seq.shape}")

# Train-test split (90-10)
train_size = int(len(X_seq) * 0.9)
X_train, X_test = X_seq[:train_size], X_seq[train_size:]
y_train, y_test = y_seq[:train_size], y_seq[train_size:]

print(f"Training set: {X_train.shape}")
print(f"Testing set: {X_test.shape}")

# Save processed data
np.save('X_train.npy', X_train)
np.save('y_train.npy', y_train)
np.save('X_test.npy', X_test)
np.save('y_test.npy', y_test)

# Save scalers for later use
import pickle
with open('scaler_X.pkl', 'wb') as f:
    pickle.dump(scaler_X, f)
with open('scaler_y.pkl', 'wb') as f:
    pickle.dump(scaler_y, f)

print("\nData preparation complete! Files saved and ready for model training.")

# Save the feature list for reference
with open('feature_list.txt', 'w') as f:
    for feature in X.columns:
        f.write(f"{feature}\n")

print("Feature list saved to 'feature_list.txt'")

Downloading Apple stock data from Yahoo Finance...


[*********************100%***********************]  1 of 1 completed

Dataset shape: (11181, 5)
Date range: 1980-12-12 00:00:00 to 2025-04-23 00:00:00
Total days: 11181
Total datapoints: 55905

Data Preview:
Price          Close      High       Low      Open     Volume
Ticker          AAPL      AAPL      AAPL      AAPL       AAPL
Date                                                         
1980-12-12  0.098726  0.099155  0.098726  0.098726  469033600
1980-12-15  0.093575  0.094005  0.093575  0.094005  175884800
1980-12-16  0.086707  0.087136  0.086707  0.087136  105728000
1980-12-17  0.088853  0.089282  0.088853  0.088853   86441600
1980-12-18  0.091429  0.091858  0.091429  0.091429   73449600

Missing values:
Price   Ticker
Close   AAPL      0
High    AAPL      0
Low     AAPL      0
Open    AAPL      0
Volume  AAPL      0
dtype: int64

Calculating technical indicators...
Original features: 5
Expanded features: 12
Total datapoints after feature engineering: 134004

Available features in the dataset:
1. ('Close', 'AAPL')
2. ('High', 'AAPL')
3. ('Low', 'A




In [None]:
#PyTorch implementation of the DNN

import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

# Load prepared data
X_train = np.load('X_train.npy')
y_train = np.load('y_train.npy')
X_test = np.load('X_test.npy')
y_test = np.load('y_test.npy')

# Define custom dataset class
class StockDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx].unsqueeze(-1)

# Create data loaders
train_dataset = StockDataset(X_train, y_train)
test_dataset = StockDataset(X_test, y_test)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Define model architecture
class StockPredictor(nn.Module):
    def __init__(self):
        super(StockPredictor, self).__init__()
        self.lstm = nn.LSTM(input_size=X_train.shape[2], hidden_size=50, num_layers=2, batch_first=True)
        self.bn = nn.BatchNorm1d(50)
        self.dropout = nn.Dropout(0.4)
        self.fc = nn.Linear(50, 1)

    def forward(self, x):
        h0 = torch.zeros(2, x.size(0), 50).to(x.device)
        c0 = torch.zeros(2, x.size(0), 50).to(x.device)
        out, _ = self.lstm(x, (h0, c0))
        out = self.bn(out[:, -1, :])
        out = self.dropout(out)
        out = self.fc(out)
        return out

# Initialize model, optimizer, and loss function
model = StockPredictor()
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=0.01)

# Train the model
for epoch in range(200):
    model.train()
    total_loss = 0
    for batch in train_loader:
        X_batch, y_batch = batch
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {total_loss / len(train_loader)}")

# Evaluate the model
model.eval()
test_loss = 0
train_mse = 0
test_mse = 0
with torch.no_grad():
    for batch in train_loader:
        X_batch, y_batch = batch
        outputs = model(X_batch)
        train_mse += criterion(outputs, y_batch).item() * len(X_batch)
    train_mse /= len(train_loader.dataset)

    for batch in test_loader:
        X_batch, y_batch = batch
        outputs = model(X_batch)
        test_mse += criterion(outputs, y_batch).item() * len(X_batch)
        loss = criterion(outputs, y_batch)
        test_loss += loss.item()
    test_mse /= len(test_loader.dataset)

print(f"Train MSE: {train_mse:.4f}")
print(f"Test MSE: {test_mse:.4f}")
print(f"Test Loss: {test_loss / len(test_loader)}")


Epoch 1, Loss: 0.04868987925314846
Epoch 2, Loss: 0.0011635342291880918
Epoch 3, Loss: 0.0004117288548607179
Epoch 4, Loss: 0.00035937132452090736
Epoch 5, Loss: 0.00037963872188275
Epoch 6, Loss: 0.0004358791798156931
Epoch 7, Loss: 0.0005886077004992657
Epoch 8, Loss: 0.0006573829042071549
Epoch 9, Loss: 0.000653965096786963
Epoch 10, Loss: 0.0006347200696137196
Epoch 11, Loss: 0.0006572611780659197
Epoch 12, Loss: 0.0006952172251635019
