In [1]:
# Import necessary libraries
import yfinance as yf  
import pandas as pd  
import numpy as np 

# Machine Learning
from sklearn.model_selection import train_test_split 
from sklearn.preprocessing import StandardScaler  
from sklearn.metrics import mean_squared_error, r2_score 
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor, AdaBoostRegressor
from sklearn.svm import SVR

# Additional
from math import sqrt  

print("Dependencies imported successfully.")


Dependencies imported successfully.


In [None]:
import yfinance as yf
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
from math import sqrt

# Import regression models
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor, AdaBoostRegressor
from sklearn.svm import SVR

def fetch_data(ticker):
    data = yf.download(ticker, start="2020-01-01", end="2023-01-01")
    data['Return'] = data['Adj Close'].pct_change()
    data['Target'] = (data['Return'] > 0).astype(int)
    data.dropna(inplace=True)
    return data

def prepare_data(data):
    features = data[['Return']].values
    target = data['Target'].values
    X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)
    
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    
    return X_train, X_test, y_train, y_test

def test_regression_model(model, X_train, X_test, y_train, y_test):
    model.fit(X_train, y_train)
    train_predictions = model.predict(X_train)
    test_predictions = model.predict(X_test)
    
    train_rmse = sqrt(mean_squared_error(y_train, train_predictions))
    train_r2 = r2_score(y_train, train_predictions)
    test_rmse = sqrt(mean_squared_error(y_test, test_predictions))
    test_r2 = r2_score(y_test, test_predictions)
    
    print(f"{model.__class__.__name__} Train RMSE: {train_rmse}")
    print(f"{model.__class__.__name__} Train R^2: {train_r2}")
    print(f"{model.__class__.__name__} Test RMSE: {test_rmse}")
    print(f"{model.__class__.__name__} Test R^2: {test_r2}")
    print("____________________________________________")

def run_regression_tests(ticker):
    data = fetch_data(ticker)
    X_train, X_test, y_train, y_test = prepare_data(data)
    models = [
        (LinearRegression(), "fit_intercept", "fit_intercept=True", "True or False"),
        (KNeighborsRegressor(), "number of neighbors", "n_neighbors=5", "1 to 20"),
        (RandomForestRegressor(), "number of trees", "n_estimators=100", "10 to 1000"),
        (ExtraTreesRegressor(), "number of trees", "n_estimators=100", "10 to 1000"),
        (AdaBoostRegressor(), "number of estimators", "n_estimators=50", "10 to 100"),
        (SVR(C=1.0, epsilon=0.2), "C and epsilon", "C=1.0, epsilon=0.2", "C: 0.01 to 100, epsilon: 0.01 to 0.2")
    ]
    for model, parameter_hint, parameter_value, range_hint in models:
        test_regression_model(model, X_train, X_test, y_train, y_test)
    print("---------------------------------------------------\nend\n================================")
    print("Parameter Hints\n================================")
    for _, parameter_hint, parameter_value, range_hint in models:
        model_name = _.__class__.__name__  # Extract the model's class name for display
        print(f"# Change the value of {parameter_hint} to adjust the model's score: {model_name}({parameter_value})")
        print(f"\"{parameter_hint}\" range can be between: {range_hint}\n")

# Example user input
ticker = 'SPY'
run_regression_tests(ticker)


In [6]:
#Broken
def test_regression_model(model, X_train, X_test, y_train, y_test, parameter_hint, parameter_value, range_hint):
    """
    Fits a regression model to the training data, makes predictions on the testing set,
    and evaluates the model's performance with RMSE and R² metrics.
    
    Parameters:
    model (estimator): The regression model to test.
    X_train (array-like): The training features.
    X_test (array-like): The testing features.
    y_train (array-like): The training target variable.
    y_test (array-like): The testing target variable.
    parameter_hint (str): Description of the model parameter to adjust.
    parameter_value (str): The current value of the parameter.
    range_hint (str): Suggested range for the parameter value.
    
    Prints the model's performance metrics (RMSE and R²).
    """
    # Fit the model to the training data
    model.fit(X_train, y_train)
    
    # Make predictions on the testing set
    predictions = model.predict(X_test)
    
    # Evaluate the model's performance
    rmse = sqrt(mean_squared_error(y_test, predictions))
    r2 = r2_score(y_test, predictions)
    
    print(f"{model.__class__.__name__} Performance:")
    print(f"RMSE: {rmse}")
    print(f"R²: {r2}")
    print("____________________________________________")

def run_regression_tests(data):
    """
    Prepares the data, tests multiple regression models, prints their performance,
    and provides parameter hints with current values and suggested ranges.
    
    Parameters:
    data (pandas.DataFrame): The stock data prepared for regression analysis.
    """
    # Prepare the data
    X_train, X_test, y_train, y_test = prepare_data(data)
    
    # Define the models to test along with parameter hints
    models = [
        (LinearRegression(), "fit_intercept", "True/False", "Include/Exclude the intercept in the model"),
        (KNeighborsRegressor(), "n_neighbors", "5", "1 to 20"),
        (RandomForestRegressor(), "n_estimators", "100", "10 to 1000"),
        (ExtraTreesRegressor(), "n_estimators", "100", "10 to 1000"),
        (AdaBoostRegressor(), "n_estimators", "50", "10 to 500"),
        (SVR(), "C and epsilon", "C=1.0, epsilon=0.2", "C: 0.01 to 100, epsilon: 0.01 to 1"),
    ]
    
    # Test each model and print performance
    for model, parameter_hint, parameter_value, range_hint in models:
        test_regression_model(model, X_train, X_test, y_train, y_test, parameter_hint, parameter_value, range_hint)

    # Print parameter hints and ranges
    # After testing all models, print parameter hints in the requested format
print("Parameter Hints\n================================")
for _, parameter_hint, parameter_value, range_hint in models:
    model_name = _.__class__.__name__  # Extract the model's class name for display
    print(f"# Change the value of {parameter_hint} to adjust the model's score: {model_name}({parameter_value})")
    print(f"\"{parameter_hint}\" range can be between: {range_hint}\n")

    # print("Parameter Hints\n================================")
    # for _, parameter_hint, parameter_value, range_hint in models:
    #     print(f"# Change the value of {parameter_hint} to adjust the model's score: {parameter_value}")
    #     print(f"\"{parameter_hint}\" range can be between: {range_hint}\n")

# Example usage
ticker = 'SPY'
data = fetch_data(ticker)
run_regression_tests(data)


Parameter Hints


NameError: name 'models' is not defined

In [2]:
def fetch_data(ticker, start_date="2020-01-01", end_date="2023-01-01"):
    """
    Fetches historical stock data for the specified ticker from Yahoo Finance.
    
    Parameters:
    ticker (str): The stock symbol for which to download the data.
    start_date (str): The start date for the data download (format: YYYY-MM-DD).
    end_date (str): The end date for the data download (format: YYYY-MM-DD).
    
    Returns:
    pandas.DataFrame: A DataFrame containing the historical stock data.
    """
    data = yf.download(ticker, start=start_date, end=end_date)
    # Calculate daily returns as a feature
    data['Daily Return'] = data['Adj Close'].pct_change()
    # Remove any NaN values that might have been introduced
    data.dropna(inplace=True)
    return data

def prepare_data(data, target_column='Daily Return', test_size=0.2, random_state=42):
    """
    Prepares the data for modeling, splitting into features and target, and then into training and testing sets.
    
    Parameters:
    data (pandas.DataFrame): The stock data.
    target_column (str): The column to be used as the target variable.
    test_size (float): The proportion of the dataset to include in the test split.
    random_state (int): Controls the shuffling applied to the data before applying the split.
    
    Returns:
    tuple: A tuple containing the training and testing datasets (X_train, X_test, y_train, y_test).
    """
    # Using previous day's return as the feature to predict today's return
    X = data[[target_column]].shift().dropna()
    y = data[target_column][1:]
    
    # Splitting the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)
    
    # Feature scaling
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    
    return X_train, X_test, y_train, y_test

print("Data fetching and preparation functions defined successfully.")


Data fetching and preparation functions defined successfully.


In [3]:
def test_regression_model(model, X_train, X_test, y_train, y_test):
    """
    Fits a regression model to the training data, makes predictions on the testing set, and evaluates the model's performance.
    
    Parameters:
    model (estimator): The regression model to test.
    X_train (array-like): The training features.
    X_test (array-like): The testing features.
    y_train (array-like): The training target variable.
    y_test (array-like): The testing target variable.
    
    Prints the model's performance metrics (RMSE and R²).
    """
    # Fit the model to the training data
    model.fit(X_train, y_train)
    
    # Make predictions on the testing set
    predictions = model.predict(X_test)
    
    # Evaluate the model's performance
    rmse = sqrt(mean_squared_error(y_test, predictions))
    r2 = r2_score(y_test, predictions)
    
    print(f"{model.__class__.__name__} Performance:")
    print(f"RMSE: {rmse}")
    print(f"R²: {r2}")
    print("____________________________________________")

def run_regression_tests(data):
    """
    Prepares the data, tests multiple regression models, and prints their performance.
    
    Parameters:
    data (pandas.DataFrame): The stock data prepared for regression analysis.
    """
    # Prepare the data
    X_train, X_test, y_train, y_test = prepare_data(data)
    
    # Define the models to test
    models = [
        LinearRegression(),
        KNeighborsRegressor(),
        RandomForestRegressor(),
        ExtraTreesRegressor(),
        AdaBoostRegressor(),
        SVR(C=1.0, epsilon=0.2),
    ]
    
    # Test each model
    for model in models:
        test_regression_model(model, X_train, X_test, y_train, y_test)

# Example usage
ticker = 'SPY'
data = fetch_data(ticker)
run_regression_tests(data)


[*********************100%%**********************]  1 of 1 completed


LinearRegression Performance:
RMSE: 0.01692756373492913
R²: 0.003016458096567809
____________________________________________
KNeighborsRegressor Performance:
RMSE: 0.019195796703088124
R²: -0.2820688764645267
____________________________________________
RandomForestRegressor Performance:
RMSE: 0.02101116039787642
R²: -0.5360280623035762
____________________________________________
ExtraTreesRegressor Performance:
RMSE: 0.023462797007486918
R²: -0.9153962828524482
____________________________________________
AdaBoostRegressor Performance:
RMSE: 0.019733063265305655
R²: -0.3548402499129104
____________________________________________
SVR Performance:
RMSE: 0.02169329114969483
R²: -0.6373818191562426
____________________________________________
