<a href="https://colab.research.google.com/github/DanielYou0/DanielYou0.github.io/blob/main/Daniel_You_predictive_model_for_stock_prices.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [53]:
# imports
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, r2_score


In [54]:
# Fetch stock data from Yahoo Finance (note this is an outdated model will change soon)
ticker = "AAPL" # change this ticker to see different stock comparisions 
start_date = "2016-01-01"
end_date = "2021-12-31"
data = yf.download(ticker, start=start_date, end=end_date)

[*********************100%***********************]  1 of 1 completed


In [55]:
# Reset the index to make Date a column
data.reset_index(inplace=True)

# Split the data into features and target variable
X = data[['Open', 'Close', 'Volume']]
y = data['Adj Close']

In [57]:
# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [60]:
# Create and train the models
linear_model = LinearRegression()
linear_model.fit(X_train, y_train)

tree_model = DecisionTreeRegressor()
tree_model.fit(X_train, y_train)

gb_model = GradientBoostingRegressor()
gb_model.fit(X_train, y_train)

In [61]:
# Make predictions on the test set
linear_pred = linear_model.predict(X_test)
tree_pred = tree_model.predict(X_test)
gb_pred = gb_model.predict(X_test)

In [62]:
# Calculate the performance metrics
linear_mse = mean_squared_error(y_test, linear_pred)
linear_r2 = r2_score(y_test, linear_pred)

tree_mse = mean_squared_error(y_test, tree_pred)
tree_r2 = r2_score(y_test, tree_pred)

gb_mse = mean_squared_error(y_test, gb_pred)
gb_r2 = r2_score(y_test, gb_pred)

In [63]:
# Print the performance scores (linear regression is the best compared to all the other models)
print("Performance Scores:")
print()

print("Linear Regression:")
print("MSE:", linear_mse)
print("R-squared:", linear_r2)
print()

print("Decision Tree:")
print("MSE:", tree_mse)
print("R-squared:", tree_r2)
print()

print("Gradient Boosting:")
print("MSE:", gb_mse)
print("R-squared:", gb_r2)
print()

Performance Scores:

Linear Regression:
MSE: 0.07417556056877522
R-squared: 0.9999570192473701

Decision Tree:
MSE: 0.22138944033970903
R-squared: 0.9998717167123894

Gradient Boosting:
MSE: 0.17532035346676816
R-squared: 0.9998984112733956

