### 📥 Setups

Installation and Import of required packages

In [836]:
# !pip install -r requirements.txt

In [837]:
import yfinance as yf
import pandas as pd
import numpy as np

Initialization of Analysis Parameters

In [838]:
TICKERS: list[str] = ["AAPL"] # , "MSFT", "AMZN", "TSLA", "NVDA", "JNJ", "JPM", "XOM", "PG", "WMT", "AMT"
START_DATE: str = "2022-01-01"
END_DATE: str = "2025-07-18"

### 🏗️ Data Acquisition

Function for Fetching Daily Price Data

In [839]:
def get_stock_data(tickers: list[str], start: str, end: str):
    """
    Fetches financial data from Yahoo Finance API for given tickers and date range.
    
    Parameters:
        tickers (list[str]): U.S. equity ticker symbols
        start_date (str): Start date in 'YYYY-MM-DD' format
        end_date (str): End date in 'YYYY-MM-DD' format

    Returns:
        pd.DataFrame: DataFrame with date-indexed adjusted closing price data
    """
    
    data = {}

    for ticker in tickers:
        try:
            # Fetch data from Yahoo Finance and select relevant columns
            df = yf.download(ticker, start=start, end=end, auto_adjust=True)[['Close', 'Volume']]
            df.columns = ['Close', 'Volume']
            
            if not df.empty:
                data[ticker] = df
            else:
                print(f"No data found for {ticker} in the specified date range.")
        except Exception as e:
            print(f"Error fetching data for {ticker}: {e}")

    return data

### 🧹 Data Cleaning and Preparation

Function for Filling Missing Values and Normalizing Values

In [840]:
def clean_data(data: dict) -> dict:
    """
    Cleans and prepares the stock data for analysis.
    
    Parameters:
        data (dict): Dictionary of DataFrames indexed by ticker symbols

    Returns:
        dict: Dictionary of DataFrames with cleaned and normalized data
    """

    cleaned_data = {}

    for ticker, df in data.items():
        # Replace zeros in 'Close' with NaN
        df.loc[df['Close'] == 0, 'Close'] = np.nan

        # Forward and backward fill missing values
        df = df.ffill().bfill()
        
        # Normalize the 'Close' prices to a range of 0 to 1
        df['Close_Normalized'] = (df['Close'] - df['Close'].min()) / (df['Close'].max() - df['Close'].min())

        df = df[['Close', 'Close_Normalized', 'Volume']] # Change column order
        
        cleaned_data[ticker] = df

    return cleaned_data

### 📊 Data Analysis

In [841]:
def calculate_ma_return_and_volatility(data: dict, ma_window: int, vol_window: int | None = None) -> dict:
    """
    Calculates the moving average and the volatility for each stock's closing price.
    
    Parameters:
        data (dict): Dictionary of DataFrames indexed by ticker symbols
        ma_window (int): Window size for moving average
        vol_window (int | None): Window size for rolling volatility calculation (Default to None, which is only non-rolling)

    Returns:
        dict: Dictionary of DataFrames with volatility added
    """
    
    final_data = {}

    for ticker, df in data.items():
        df['Return'] = df['Close'].pct_change()  # simple % return

        df[f'MA_{ma_window}'] = df['Close'].rolling(window=ma_window).mean()

        if vol_window is not None:
            df[f'Volatility_{vol_window}'] = df['Return'].rolling(window=vol_window).std()
            
            df = df.iloc[max(ma_window, vol_window):]  # Ensure enough data for both MA and Volatility calculations
        else:
            df = df.iloc[ma_window:]

        final_data[ticker] = (df, df['Return'].std())

    return final_data

In [842]:
t = get_stock_data(TICKERS, START_DATE, END_DATE)
c_t = clean_data(t)
ma_vol_data = calculate_ma_return_and_volatility(c_t, ma_window=20, vol_window=20)

[*********************100%***********************]  1 of 1 completed
