In [3]:
import yfinance as yf
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Step 1: Fetch historical stock data using yfinance
stock_symbol = 'AAPL'
stock_data = yf.download(stock_symbol, start='2020-01-01', end='2023-01-01')

[*********************100%***********************]  1 of 1 completed


In [5]:
# Step 2: Feature Engineering - Calculate financial indicators
stock_data['SMA'] = stock_data['Close'].rolling(window=20).mean()  # 20-day Simple Moving Average

In [6]:
up_prices = np.where(stock_data['Close'].diff(1) > 0, stock_data['Close'].diff(1), 0)
down_prices = np.where(stock_data['Close'].diff(1) < 0, -stock_data['Close'].diff(1), 0)
avg_gain = np.mean(up_prices[-14:])
avg_loss = np.mean(down_prices[-14:])
rsi = 100 - (100 / (1 + avg_gain / avg_loss))
stock_data['RSI'] = rsi

In [7]:
# Step 3: Prepare the data for training the model
X = stock_data[['SMA', 'RSI']].dropna()  # Features
y = stock_data['Close'].shift(-1).dropna()  # Target variable (next day's closing price)

In [30]:
from datetime import datetime, timedelta

In [31]:
now = datetime.now()

# Calculate the date and time for 10 AM yesterday
yesterday = now - timedelta(days=1)
user_date = datetime(yesterday.year, yesterday.month, yesterday.day, hour=10, minute=0, second=0)    

stock_symbol = 'TSLA'
stock_data = yf.download(stock_symbol, start=user_date, end=user_date)

[*********************100%***********************]  1 of 1 completed


In [36]:
stock_data.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,SMA,RSI
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2023-07-24,255.850006,269.850006,254.119995,269.059998,269.059998,136508500,,


In [35]:
#testing
#it appears that there has to be a long enough time period, so I need to edit the stock_data to get the past year of the given stock

stock_data['SMA'] = stock_data['Close'].rolling(window=20).mean() 
up_prices = np.where(stock_data['Close'].diff(1) > 0, stock_data['Close'].diff(1), 0)
down_prices = np.where(stock_data['Close'].diff(1) < 0, -stock_data['Close'].diff(1), 0)
avg_gain = np.mean(up_prices[-14:])
avg_loss = np.mean(down_prices[-14:])
rsi = 100 - (100 / (1 + avg_gain / avg_loss))
stock_data['RSI'] = rsi
X = stock_data[['SMA', 'RSI']].dropna()  #Features
X.head()

  rsi = 100 - (100 / (1 + avg_gain / avg_loss))


Unnamed: 0_level_0,SMA,RSI
Date,Unnamed: 1_level_1,Unnamed: 2_level_1


In [9]:
# Make sure the number of samples in X and y are consistent
num_samples = min(X.shape[0], y.shape[0])
X = X.iloc[:num_samples]
y = y.iloc[:num_samples]

In [10]:
# Step 4: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [18]:
# Step 5: Train the linear regression model
model_tech = LinearRegression()
model_tech.fit(X_train, y_train)

LinearRegression()

Unnamed: 0_level_0,SMA,RSI
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2022-09-26,155.414500,31.430277
2020-03-18,70.335499,31.430277
2022-04-04,166.821501,31.430277
2020-11-11,115.578500,31.430277
2021-02-17,136.261000,31.430277
...,...,...
2020-05-11,72.161875,31.430277
2021-05-20,129.073001,31.430277
2022-06-15,142.498002,31.430277
2021-02-09,134.943501,31.430277


In [24]:
X_test.head()

Unnamed: 0_level_0,SMA,RSI
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2022-09-26,155.4145,31.430277
2020-03-18,70.335499,31.430277
2022-04-04,166.821501,31.430277
2020-11-11,115.5785,31.430277
2021-02-17,136.261,31.430277


In [19]:
# Step 6: Make predictions using the trained model
y_pred = model_tech.predict(X_test)

In [None]:
#testing my model
p = model_tech.predict(X_test)

In [14]:
# Step 7: Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

In [15]:
print(f'Mean Squared Error: {mse:.2f}')
print(f'R-squared: {r2:.2f}')

Mean Squared Error: 31.98
R-squared: 0.97


In [16]:
# Step 8: Use the model to predict future stock prices
# Get the latest financial indicators for the next day's prediction
latest_indicators = stock_data[['SMA', 'RSI']].tail(1)

In [17]:
# Make the prediction for the next day's closing price
next_day_prediction = model.predict(latest_indicators)
print(f'Predicted Stock Price for Next Day: {next_day_prediction[0]:.2f}')

Predicted Stock Price for Next Day: 136.70


In [22]:
import pickle
with open('model_tech_pickle', 'wb') as f:
    pickle.dump(model_tech, f)