In [1]:
import yfinance as yf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense
import random

# List of top 20 S&P 500 companies
top_20_stocks = ['AAPL', 'MSFT', 'GOOGL', 'AMZN', 'TSLA', 'META', 'NVDA', 'BRK-B', 'V', 'UNH',
                 'JPM', 'HD', 'DIS', 'PYPL', 'MA', 'VZ', 'NFLX', 'CSCO', 'PEP', 'INTC']

# Function to build CNN model
def build_cnn_model(input_shape, units=50):
    model = Sequential()
    model.add(Conv1D(filters=units, kernel_size=3, activation='relu', input_shape=input_shape))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(units=1))  # Predict a single value (Close price)
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

# Objective Function for SWO (evaluate model performance)
def objective(params, X_train, y_train, X_test, y_test, scaler):
    units = params[0]  # Number of filters (CNN units)

    model = build_cnn_model(input_shape=(X_train.shape[1], 1), units=units)
    model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=0)

    # Predict stock prices for the test set
    y_pred = model.predict(X_test)

    # Calculate MSE for the stock
    mse = mean_squared_error(y_test, y_pred)

    return mse

# Spider Wasp Optimization (SWO) Implementation
def swarm_optimization(num_wasps=5, iterations=10, X_train=None, y_train=None, X_test=None, y_test=None, scaler=None):
    # Initialize the swarm (wasps)
    swarm = []
    for _ in range(num_wasps):
        units = random.randint(50, 200)  # Randomly initialize number of filters (CNN units)
        swarm.append([units])

    best_position = None
    best_value = float('inf')

    # Optimization loop
    for _ in range(iterations):
        for i, wasp in enumerate(swarm):
            value = objective(wasp, X_train, y_train, X_test, y_test, scaler)
            if value < best_value:
                best_value = value
                best_position = wasp

        # Update positions based on exploration/exploitation rules (simplified)
        for i in range(num_wasps):
            if random.random() < 0.5:
                swarm[i] = [best_position[0] + random.randint(-5, 5)]  # Update only units

    return best_position

# Function to preprocess and train model on each stock
def process_stock(stock, seq_length=60, results_file="stock_results.txt"):
    print(f"Processing stock: {stock}")

    # Download historical data for each stock from Yahoo Finance
    stock_data = yf.download(stock, start="2014-01-01", end="2024-12-31")

    # Preprocess the data (use only 'Close' prices)
    stock_data['Date'] = stock_data.index
    stock_data.set_index('Date', inplace=True)

    # Remove any rows with missing values
    #stock_data.dropna(subset=['Close'], inplace=True)

    # Normalize the 'Close' prices using MinMaxScaler
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(stock_data[['Close']])

    # Create sequences of data for CNN (1D)
    X = []
    y = []

    # Prepare data using a sliding window approach
    for i in range(seq_length, len(scaled_data)):
        X.append(scaled_data[i-seq_length:i, 0])  # Features (previous days' Close)
        y.append(scaled_data[i, 0])  # Target (current day's Close)

    X, y = np.array(X), np.array(y)
    X = np.reshape(X, (X.shape[0], X.shape[1], 1))  # Reshape for CNN (1D)

    # Split data into training and testing sets (80% train, 20% test)
    train_size = int(len(X) * 0.8)
    X_train, X_test = X[:train_size], X[train_size:]
    y_train, y_test = y[:train_size], y[train_size:]

    # Run Spider Wasp Optimization to get the best hyperparameters
    best_params = swarm_optimization(num_wasps=5, iterations=10, X_train=X_train, y_train=y_train,
                                     X_test=X_test, y_test=y_test, scaler=scaler)

    print(f"Best parameters for {stock}: {best_params}")

    # Build and train the CNN model with the best hyperparameters
    model = build_cnn_model(input_shape=(X_train.shape[1], 1), units=best_params[0])
    model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=0)

    # Predict stock prices for the test set
    y_pred = model.predict(X_test)

    # Calculate MSE for the stock
    mse = mean_squared_error(y_test, y_pred)

    # Inverse transform predictions to get actual stock prices
    y_pred_actual = scaler.inverse_transform(np.column_stack((X_test[:, -1, 0], y_pred)))[:, 1]
    y_test_actual = scaler.inverse_transform(np.column_stack((X_test[:, -1, 0], y_test)))[:, 1]

    # Get the dates for the X-axis (from the test data dates)
    dates = stock_data.index[-len(y_test_actual):]

    # Save MSE result to the text file
    with open(results_file, 'a') as f:
        f.write(f"{stock} - MSE: {mse}\n")

    # Plot the actual vs predicted stock prices with SWO reference in the title
    plt.figure(figsize=(10, 6))
    plt.plot(dates, y_test_actual, color='blue', label=f'Actual {stock} Stock Price')
    plt.plot(dates, y_pred_actual, color='red', label=f'Predicted {stock} Stock Price (SWO Optimized)')
    plt.title(f'{stock} Stock Price Prediction with CNN (SWO Optimized)')
    plt.xlabel('Time')
    plt.ylabel('Stock Price')
    plt.legend()

    # Save the plot as an image
    plt.savefig(f"{stock}_stock_price_prediction.png")
    plt.close()

# Run the process for each stock in the top 20 list and save results in a file
results_file = "stock_results.txt"
with open(results_file, 'w') as f:
    f.write("Stock Price Prediction Results:\n\n")

for stock in top_20_stocks:
    process_stock(stock, results_file=results_file)

# Provide a way to download the MSE results and the images
from google.colab import files
files.download('stock_results.txt')

ValueError: numpy.dtype size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject