In [1]:
import yfinance as yf
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.dummy import DummyClassifier


In [2]:
def fetch_data(ticker):
    data = yf.download(ticker, start="2020-01-01", end="2023-01-01")
    data['Return'] = data['Adj Close'].pct_change()
    data['Target'] = (data['Return'] > 0).astype(int)
    data.dropna(inplace=True)
    return data

def prepare_data(data):
    features = data[['Return']].values
    target = data['Target'].values
    X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    return X_train, X_test, y_train, y_test


In [3]:
def test_classification_model(model, X_train, X_test, y_train, y_test, parameter_hint):
    model.fit(X_train, y_train)
    train_predictions = model.predict(X_train)
    test_predictions = model.predict(X_test)
    train_accuracy = accuracy_score(y_train, train_predictions)
    test_accuracy = accuracy_score(y_test, test_predictions)
    print(f"{model.__class__.__name__} Train Accuracy: {train_accuracy}")
    print(f"{model.__class__.__name__} Test Accuracy: {test_accuracy}")
    print(f"# Change the value of {parameter_hint} to adjust the model's score")


In [4]:
def test_classification_model(model, X_train, X_test, y_train, y_test, parameter_hint, parameter_value):
    model.fit(X_train, y_train)
    train_predictions = model.predict(X_train)
    test_predictions = model.predict(X_test)
    train_accuracy = accuracy_score(y_train, train_predictions)
    test_accuracy = accuracy_score(y_test, test_predictions)
    print(f"{model.__class__.__name__} Train Accuracy: {train_accuracy}")
    print(f"{model.__class__.__name__} Test Accuracy: {test_accuracy}")
    print("____________________________________________")

def run_classification_tests(ticker):
    data = fetch_data(ticker)
    X_train, X_test, y_train, y_test = prepare_data(data)
    models = [
        (LogisticRegression(), "C value", "C=1.0", "0.01 to 100"),
        (KNeighborsClassifier(), "number of neighbors", "n_neighbors=5", "1 to 20"),
        (DecisionTreeClassifier(), "max depth", "max_depth=None", "1 to None"),
        (RandomForestClassifier(), "number of trees", "n_estimators=100", "10 to 1000"),
        (GradientBoostingClassifier(), "learning rate", "learning_rate=0.1", "0.01 to 1"),
        (DummyClassifier(strategy='most_frequent'), "strategy", "strategy='most_frequent'", "most_frequent, stratified, uniform, constant")
    ]
    for model, parameter_hint, parameter_value, range_hint in models:
        test_classification_model(model, X_train, X_test, y_train, y_test, parameter_hint, parameter_value)
    print("---------------------------------------------------\nParameter Hints\n================================")
    for model, parameter_hint, parameter_value, range_hint in models:
        print(f"# Change the value of {parameter_hint} to adjust the model's score: {model.__class__.__name__}({parameter_value})")
        print(f"\"{parameter_hint}\" range can be between: {range_hint}\n")

# Example user input
ticker = 'SPY'
run_classification_tests(ticker)


[*********************100%%**********************]  1 of 1 completed


LogisticRegression Train Accuracy: 1.0
LogisticRegression Test Accuracy: 1.0
____________________________________________
KNeighborsClassifier Train Accuracy: 0.9983443708609272
KNeighborsClassifier Test Accuracy: 1.0
____________________________________________
DecisionTreeClassifier Train Accuracy: 1.0
DecisionTreeClassifier Test Accuracy: 0.9933774834437086
____________________________________________
RandomForestClassifier Train Accuracy: 1.0
RandomForestClassifier Test Accuracy: 0.9933774834437086
____________________________________________
GradientBoostingClassifier Train Accuracy: 1.0
GradientBoostingClassifier Test Accuracy: 0.9933774834437086
____________________________________________
DummyClassifier Train Accuracy: 0.5198675496688742
DummyClassifier Test Accuracy: 0.5695364238410596
____________________________________________
---------------------------------------------------
Parameter Hints
# Change the value of C value to adjust the model's score: LogisticRegression(C=

lesson 18

In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf





In [7]:
df=pd.read_csv(r'C:\Users\mikea\Desktop\Dataclass2\StockPrediction_Project3\Index_Fund_Price_Prediction_App\Resources\stock_data.csv')
df.head()

Unnamed: 0,Date,Adj Close DIA,Adj Close IVV,Adj Close QQQ,Adj Close SPY,Adj Close SWPPX,Adj Close VOO,Adj Close VTSAX,Adj Close VTWO,Close DIA,...,Open VTSAX,Open VTWO,Volume DIA,Volume IVV,Volume QQQ,Volume SPY,Volume SWPPX,Volume VOO,Volume VTSAX,Volume VTWO
0,2019-01-02,210.708572,231.034561,149.864609,230.557465,35.365585,211.2491,57.771988,50.552246,233.360001,...,62.150002,53.044998,5319500,5980000,58576700,126925200,0,4891300,0,471800
1,2019-01-03,204.71315,225.261505,144.968475,225.055664,34.498966,206.206497,56.433426,49.5942,226.720001,...,60.709999,53.77,7350200,5831100,74820200,144140700,0,3329600,0,372800
2,2019-01-04,211.539352,233.182281,151.170883,232.594116,35.679054,213.012665,58.38549,51.430855,234.279999,...,62.810001,53.775002,7343700,6430100,74709300,142628800,0,5100100,0,312800
3,2019-01-07,212.514465,234.751816,152.970657,234.42804,35.927967,214.61087,58.887459,52.398243,235.360001,...,63.349998,55.134998,5101600,5961000,52059300,103139100,0,3706000,0,281000
4,2019-01-08,214.826004,237.064682,154.35434,236.630554,36.278305,216.695892,59.510258,53.197399,237.919998,...,64.019997,56.485001,4845800,5348500,49388700,102512600,0,3546600,0,193200
