# Imports

In [17]:
import yfinance as yf
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error

import warnings
warnings.filterwarnings("ignore")


# Fix the warning about a highly fragmented DataFrame
pd.options.mode.chained_assignment = None  # Disable the warning about chained assignment

# Helper Functions

In [18]:
def getRelativeStockData(ticker, start, end="2024-2-2", interval="1d"):
    data = yf.download(ticker, start=start, end=end, interval=interval)

    # Making it relative to the previous data point with 1% = 0.01
    # All in a pandas dataframe

    # Only use the close price
    data = data["Close"]
    
    data = data.pct_change().dropna()
    data = data.reset_index()
    data = data.rename(columns={"index": "Date"})


    return data


def scrollData(dataInput, contextWindow=10, solutionWindow=3):
    # Scrolling through the data and returning the context window and the solution window
    # Columns: Date, Solution1, Solution2, ..., Context0, Context-1, Context-2, ...

    # To make this more fast and efficient and supress the warning about chained assignment
    data = dataInput.copy()

    # Create the columns for the context and solution windows
    for i in range(solutionWindow):
        data[f"Solution{i+1}"] = data["Close"].shift(-i)
    for i in range(contextWindow):
        data[f"Context{-i}"] = data["Close"].shift(i)

    # Drop the NaN values
    data = data.dropna()

    # Drop the Close column
    data = data.drop(columns=["Close"])

    # Fix the index
    data = data.reset_index(drop=True)

    return data


def unrelativizeDataArray(data):
    # Unrelativize the data
    # This is done by multiplying the previous data point with the current data point
    # This is done for all values except the first

    processedData = data.copy()

    # Its an array in an array for some reason (with predictions)
    if len(processedData) == 1:
        processedData = processedData[0]

    # Make the base (first value) 0
    processedData[0] = 0

    # Loop through the data and unrelativize it
    for i in range(len(processedData)):
        if i != 0:
            processedData[i] = (processedData[i] + 1) * (processedData[i-1] + 1) - 1

    return processedData

# Statistics Preparation

In [19]:
statistics = pd.DataFrame(columns=["contextWindow", "solutionWindow", "trainMSE", "testMSE"])

# Testing Loop
This does:
- Data Prepping
- Training
- and Testing

In [20]:
# Some Parameters
for contextWindow in range(10, 1010, 10):
    for solutionWindow in range(10, 1010, 10):
        # Scroll the data
        data = getRelativeStockData("GOOG", "2000-1-1", interval="1d")
        data = scrollData(data, contextWindow=contextWindow, solutionWindow=solutionWindow)

        # Split the data into training and testing data
        train, test = train_test_split(data, test_size=0.1)

        # Columns for the context and solution windows
        contextColumns = [f"Context{-i}" for i in range(contextWindow)]
        solutionColumns = [f"Solution{i+1}" for i in range(solutionWindow)]

        # Split X and y
        X_train = train[contextColumns]
        y_train = train[solutionColumns]

        X_test = test[contextColumns]
        y_test = test[solutionColumns]


        ####### Train the model #######

        # Making the MLP model
        # model = MLPRegressor(hidden_layer_sizes=(1), max_iter=100, verbose=True)
        model = MLPRegressor(hidden_layer_sizes=(100, 100, 100, 100, 100), activation='relu', solver='adam', alpha=0.0001, learning_rate='constant', learning_rate_init=0.001)


        # Train the model
        model.fit(X_train, y_train)



        ####### Test the model #######

        # Predict the Testing Data
        predicitons = model.predict(X_test)

        # Calculate the mean squared error
        testMSE = mean_squared_error(y_test, predicitons)


        # Predict the Training Data
        predicitons = model.predict(X_train)

        # Calculate the mean squared error
        trainMSE = mean_squared_error(y_train, predicitons)
        

        # Print the mean squared error
        print(f"Mean Squared Error: {testMSE, trainMSE}")
        print(f"Context Window: {contextWindow}, Solution Window: {solutionWindow}")
        print(f"Progress: {(contextWindow + solutionWindow) / 2000 * 100} %")

        # Save the statistics to a csv file
        statistics.to_csv("statistics.csv")


        # Add the statistics to the dataframe
        statistics = statistics.append({"contextWindow": contextWindow, "solutionWindow": solutionWindow, "trainMSE": trainMSE, "testMSE": testMSE}, ignore_index=True)

[*********************100%***********************]  1 of 1 completed
Mean Squared Error: (0.00030649441055861194, 0.00034615192085194304)
Context Window: 10, Solution Window: 10
Progress: 1.0 %
[*********************100%***********************]  1 of 1 completed
Mean Squared Error: (0.0003605647892564373, 0.00036455215457980434)
Context Window: 10, Solution Window: 20
Progress: 1.5 %
[*********************100%***********************]  1 of 1 completed
Mean Squared Error: (0.0003733763875580825, 0.0003671619053433681)
Context Window: 10, Solution Window: 30
Progress: 2.0 %
[*********************100%***********************]  1 of 1 completed
Mean Squared Error: (0.0003902964170701275, 0.00036878828039782984)
Context Window: 10, Solution Window: 40
Progress: 2.5 %
[*********************100%***********************]  1 of 1 completed
Mean Squared Error: (0.00037875837594570375, 0.00036963174299947096)
Context Window: 10, Solution Window: 50
Progress: 3.0 %
[*********************100%********


1 Failed download:
['GOOG']: Exception('%ticker%: No price data found, symbol may be delisted (1d 2000-1-1 -> 2024-2-2)')





ValueError: With n_samples=0, test_size=0.1 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.