In [5]:
import yfinance as yf
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.dummy import DummyClassifier


In [6]:
def fetch_data(ticker):
    data = yf.download(ticker, start="2020-01-01", end="2023-01-01")
    data['Return'] = data['Adj Close'].pct_change()
    data['Target'] = (data['Return'] > 0).astype(int)
    data.dropna(inplace=True)
    return data

def prepare_data(data):
    features = data[['Return']].values
    target = data['Target'].values
    X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    return X_train, X_test, y_train, y_test


In [7]:
def test_classification_model(model, X_train, X_test, y_train, y_test, parameter_hint):
    model.fit(X_train, y_train)
    train_predictions = model.predict(X_train)
    test_predictions = model.predict(X_test)
    train_accuracy = accuracy_score(y_train, train_predictions)
    test_accuracy = accuracy_score(y_test, test_predictions)
    print(f"{model.__class__.__name__} Train Accuracy: {train_accuracy}")
    print(f"{model.__class__.__name__} Test Accuracy: {test_accuracy}")
    print(f"# Change the value of {parameter_hint} to adjust the model's score")


In [8]:
def test_classification_model(model, X_train, X_test, y_train, y_test, parameter_hint, parameter_value):
    model.fit(X_train, y_train)
    train_predictions = model.predict(X_train)
    test_predictions = model.predict(X_test)
    train_accuracy = accuracy_score(y_train, train_predictions)
    test_accuracy = accuracy_score(y_test, test_predictions)
    print(f"{model.__class__.__name__} Train Accuracy: {train_accuracy}")
    print(f"{model.__class__.__name__} Test Accuracy: {test_accuracy}")
    print("____________________________________________")

def run_classification_tests(ticker):
    data = fetch_data(ticker)
    X_train, X_test, y_train, y_test = prepare_data(data)
    models = [
        (LogisticRegression(), "C value", "C=1.0", "0.01 to 100"),
        (KNeighborsClassifier(), "number of neighbors", "n_neighbors=5", "1 to 20"),
        (DecisionTreeClassifier(), "max depth", "max_depth=None", "1 to None"),
        (RandomForestClassifier(), "number of trees", "n_estimators=100", "10 to 1000"),
        (GradientBoostingClassifier(), "learning rate", "learning_rate=0.1", "0.01 to 1"),
        (DummyClassifier(strategy='most_frequent'), "strategy", "strategy='most_frequent'", "most_frequent, stratified, uniform, constant")
    ]
    for model, parameter_hint, parameter_value, range_hint in models:
        test_classification_model(model, X_train, X_test, y_train, y_test, parameter_hint, parameter_value)
    print("---------------------------------------------------\nParameter Hints\n================================")
    for model, parameter_hint, parameter_value, range_hint in models:
        print(f"# Change the value of {parameter_hint} to adjust the model's score: {model.__class__.__name__}({parameter_value})")
        print(f"\"{parameter_hint}\" range can be between: {range_hint}\n")

# Example user input
ticker = 'SPY'
run_classification_tests(ticker)


[*********************100%%**********************]  1 of 1 completed


LogisticRegression Train Accuracy: 1.0
LogisticRegression Test Accuracy: 1.0
____________________________________________
KNeighborsClassifier Train Accuracy: 0.9983443708609272
KNeighborsClassifier Test Accuracy: 1.0
____________________________________________
DecisionTreeClassifier Train Accuracy: 1.0
DecisionTreeClassifier Test Accuracy: 0.9933774834437086
____________________________________________
RandomForestClassifier Train Accuracy: 1.0
RandomForestClassifier Test Accuracy: 0.9933774834437086
____________________________________________
GradientBoostingClassifier Train Accuracy: 1.0
GradientBoostingClassifier Test Accuracy: 0.9933774834437086
____________________________________________
DummyClassifier Train Accuracy: 0.5198675496688742
DummyClassifier Test Accuracy: 0.5695364238410596
____________________________________________
---------------------------------------------------
Parameter Hints
# Change the value of C value to adjust the model's score: LogisticRegression(C=