In [1]:
import os
import sys
import requests
from datetime import datetime

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import RocCurveDisplay
from sklearn.metrics import accuracy_score, classification_report

import yfinance as yf



  _empty_series = pd.Series()


In [2]:
def grab_price_data():
    tickers_list = ['JPM', 'COST', 'IBM', 'HD', 'ARWR']

# Store multiple result sets.
    end_date = datetime.now().strftime('%Y-%m-%d')
    full_price_history = []

    for ticker in tickers_list:
        price_history = yf.Ticker(ticker).history(period='max', start='2023-06-01', end=end_date, interval='1d')

        for index, row in price_history.iterrows():
            row_data = row.to_dict()
            row_data['symbol'] = ticker
            row_data['datetime'] = index.strftime('%Y-%m-%d')  # Convert Pandas Timestamp to datetime string
            full_price_history.append(row_data)


    price_data = pd.DataFrame(full_price_history)
    price_data.to_csv('price_data.csv', index=False)
    price_data

In [25]:
if os.path.exists('./data/price_data.csv'):
    
    # Load the data
    price_data = pd.read_csv('price_data.csv')

else:

    # Grab the data and store it.
    grab_price_data()

    # Load the data
    price_data = pd.read_csv('price_data.csv')

# Display the head before moving on.
# price_data.head()

In [26]:
# Rearranging Columns 
price_data_ro = price_data

price_data = price_data_ro[['datetime', 'symbol', 'Open', 'High', 'Low', 'Close', 'Volume', 'Dividends', 'Stock Splits']] # rearrange column here
price_data.to_csv('./price_data_ro.csv')

price_data.head()

Unnamed: 0,datetime,symbol,Open,High,Low,Close,Volume,Dividends,Stock Splits
0,2023-06-01,JPM,133.770797,135.446353,132.722337,134.809448,12248100.0,0.0,0.0
1,2023-06-02,JPM,136.749572,138.630906,136.534001,137.641251,12917200.0,0.0,0.0
2,2023-06-05,JPM,137.288511,137.288511,135.348388,136.289047,8511900.0,0.0,0.0
3,2023-06-06,JPM,135.887284,137.278701,135.426762,136.533997,9329500.0,0.0,0.0
4,2023-06-07,JPM,136.739784,138.425149,136.034282,137.856827,9281400.0,0.0,0.0


In [27]:
# sort the values by symbol and then date
price_data.sort_values(by = ['symbol','datetime'], inplace = True)

# calculate the change in price
price_data['change_in_price'] = price_data['Close'].diff()

In [28]:
price_data.head()

Unnamed: 0,datetime,symbol,Open,High,Low,Close,Volume,Dividends,Stock Splits,change_in_price
704,2023-06-01,ARWR,34.549999,35.200001,33.25,34.669998,1026400.0,0.0,0.0,
705,2023-06-02,ARWR,35.490002,36.07,34.560001,36.049999,942400.0,0.0,0.0,1.380001
706,2023-06-05,ARWR,35.490002,36.169998,34.970001,35.0,710300.0,0.0,0.0,-1.049999
707,2023-06-06,ARWR,35.139999,36.341999,34.950001,36.119999,670100.0,0.0,0.0,1.119999
708,2023-06-07,ARWR,36.189999,36.75,35.380001,36.459999,695500.0,0.0,0.0,0.34


In [29]:
price_data.tail()

Unnamed: 0,datetime,symbol,Open,High,Low,Close,Volume,Dividends,Stock Splits,change_in_price
171,2024-02-06,JPM,174.610001,175.869995,173.759995,175.100006,6764800.0,0.0,0.0,0.600006
172,2024-02-07,JPM,175.690002,175.869995,173.960007,175.429993,7225500.0,0.0,0.0,0.329987
173,2024-02-08,JPM,175.0,175.309998,173.570007,174.800003,6060300.0,0.0,0.0,-0.62999
174,2024-02-09,JPM,175.0,175.100006,173.669998,175.009995,6295900.0,0.0,0.0,0.209991
175,2024-02-12,JPM,174.779999,176.809998,173.699997,175.789993,8536200.0,0.0,0.0,0.779999
