In [1]:
import yfinance as yf
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import dask.dataframe as dd
import dask
dask.config.set({"dataframe.convert-string": False})
import sys
import os
import hvplot.dask 

In [2]:
from concurrent.futures import ThreadPoolExecutor
sys.path.append(os.path.abspath("../src"))
from data_loader import *
from utils import *

In [3]:
# def fetch_data_for_tickers(tickers, start_date, end_date, save_dir="data", suffix=""):
#     """
#     Fetch historical data for multiple tickers in parallel.

#     Parameters:
#         tickers (list): List of stock tickers.
#         start_date (str): Start date in "YYYY-MM-DD".
#         end_date (str): End date in "YYYY-MM-DD".
#         save_dir (str): Directory to save the data.
#     """
#     with ThreadPoolExecutor() as executor:
#         futures = [
#             executor.submit(fetch_historical_data, ticker, start_date, end_date, save_dir, suffix)
#             for ticker in tickers
#         ]
#         for future in futures:
#             try:
#                 print(f"Saved to: {future.result()}")
#             except Exception as e:
#                 print(f"Error fetching data: {e}")

def fetch_historical_data(ticker, start_date, end_date, save_dir="data", suffix_file=""):
    """
    Fetch historical market data for a given ticker symbol and save it as a CSV.

    Parameters:
        ticker (str): Stock ticker symbol.
        start_date (str): Start date in "YYYY-MM-DD".
        end_date (str): End date in "YYYY-MM-DD".
        save_dir (str): Directory to save the data.
    """
    os.makedirs(save_dir, exist_ok=True)
    data = yf.download(ticker, start=start_date, end=end_date)
    file_path = os.path.join(save_dir, f"{ticker}_historical_prices_{suffix_file}.csv")
    data.to_csv(file_path)
    return file_path

In [6]:
# Example usage
tickers = ["AAPL", "GOOGL", "MSFT", "TSLA"]
for ticker in tickers:
    fetch_historical_data(ticker, "2010-01-01", "2023-01-01")

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


## Data Preparation

In [7]:
process_all_tickers()

Processed and saved: processed_data/AAPL_historical_prices_.csv
Processed and saved: processed_data/GOOGL_historical_prices_.csv
Processed and saved: processed_data/TSLA_historical_prices_.csv
Processed and saved: processed_data/MSFT_historical_prices_.csv


## Exploratory Data Analysis

In [8]:
import hvplot.dask  # Enables hvPlot for Dask
import dask.dataframe as dd

def visualize_ticker(ticker, data_dir="processed_data"):
    """
    Visualize the closing price and Bollinger Bands for a specific ticker.

    Parameters:
        ticker (str): Stock ticker symbol.
        data_dir (str): Directory containing processed data.
    """
    file_path = f"./{data_dir}/{ticker}_historical_prices_.csv"
    print(file_path)
    df = dd.read_csv(file_path, header=0, 
                   skiprows=[1, 2], parse_dates=["Price"])
    df = df.rename(columns={"Price": "Date"})

    plot = df.hvplot.line(
        x="Date", y=["Close", "Bollinger_Upper", "Bollinger_Lower"],
        title=f"{ticker} Closing Prices with Bollinger Bands",
        xlabel="Date", ylabel="Price"
    )
    return plot

visualize_ticker("AAPL")

./processed_data/AAPL_historical_prices_.csv
