## 💸 Stock Trend Analysis Framework

### 📥 Setups

Installation and Import of required packages

In [124]:
# !pip install -r requirements.txt

In [125]:
import yfinance as yf
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import nbformat

Initialization of Analysis Parameters

In [126]:
TICKERS: list[str] = ["AAPL", "MSFT", "AMZN", "TSLA", "NVDA", "JNJ", "JPM", "XOM", "PG", "WMT", "AMT"]
START_DATE: str = "2022-01-01"
END_DATE: str = "2025-07-18"

### 🏗️ Data Acquisition

Function for Fetching Daily Price Data

In [127]:
def get_stock_data(tickers: list[str], start: str, end: str):
    """
    Fetches financial data from Yahoo Finance API for given tickers and date range.
    
    Parameters:
        tickers (list[str]): U.S. equity ticker symbols
        start_date (str): Start date in 'YYYY-MM-DD' format
        end_date (str): End date in 'YYYY-MM-DD' format

    Returns:
        dict[str, pd.DataFrame]: Dictionary of DataFrames with date-indexed adjusted closing price data
    """
    
    data = {}

    for ticker in tickers:
        try:
            # Fetch data from Yahoo Finance and select relevant columns
            df = yf.download(ticker, start=start, end=end, auto_adjust=True)[['Close', 'Volume']]
            df.columns = ['Close', 'Volume']
            
            if not df.empty:
                data[ticker] = df
            else:
                print(f"No data found for {ticker} in the specified date range.")
        except Exception as e:
            print(f"Error fetching data for {ticker}: {e}")

    return data

### 🧹 Data Cleaning and Preparation

Function for Filling Missing Values and Normalizing Values

In [128]:
def clean_data(data: dict[str, pd.DataFrame]) -> dict:
    """
    Cleans and prepares the stock data for analysis.
    
    Parameters:
        data (dict[str, pd.DataFrame]): Dictionary of DataFrames indexed by ticker symbols

    Returns:
        dict[str, pd.DataFrame]: Dictionary of DataFrames with cleaned and normalized data
    """

    cleaned_data = {}

    for ticker, df in data.items():
        # Replace zeros in 'Close' with NaN
        df.loc[df['Close'] == 0, 'Close'] = np.nan

        # Forward and backward fill missing values
        df = df.ffill()
        
        # Normalize the 'Close' prices to a range of 0 to 1
        df['Close_Normalized'] = (df['Close'] - df['Close'].min()) / (df['Close'].max() - df['Close'].min())

        df = df[['Close', 'Close_Normalized', 'Volume']] # Change column order
        
        cleaned_data[ticker] = df

    return cleaned_data

### 📊 Data Analysis

Functions for Calculating Return, Moving Average, and Volatility

In [129]:
def calculate_return(df: pd.DataFrame) -> pd.DataFrame:
    """
    Calculates the daily return of the stock based on the 'Close' price.

    Parameters:
        df (pd.DataFrame): DataFrame containing stock data with 'Close' prices

    Returns:
        pd.DataFrame: DataFrame with an additional 'Return' column
    """
    
    df['Return'] = df['Close'].pct_change()
    return df

def calculate_moving_average(df: pd.DataFrame, window: int) -> pd.DataFrame:
    """
    Calculates the moving average of the 'Close' price over a specified window.

    Parameters:
        df (pd.DataFrame): DataFrame containing stock data with 'Close' prices
        window (int): The number of periods over which to calculate the moving average
    
    Returns:
        pd.DataFrame: DataFrame with an additional column for the moving average
    """

    df[f'Moving_Avg_{window}'] = df['Close'].rolling(window=window).mean()
    return df

def calculate_volatility(df: pd.DataFrame, window: int) -> pd.DataFrame:
    """
    Calculates the volatility of the stock based on the 'Return' column over a specified window.

    Parameters:
        df (pd.DataFrame): DataFrame containing stock data with 'Return' column
        window (int): The number of periods over which to calculate the volatility
    
    Returns:
        pd.DataFrame: DataFrame with an additional column for the volatility
    """

    df[f'Rolling_Vol_{window}'] = df['Return'].rolling(window=window).std()
    return df

Function for Calculating all the Measures

In [130]:
def calculate_measures(data: dict[str, pd.DataFrame], ma_window: int | list[int] = 50, vol_window: int = 20) -> dict:
    """
    Calculates the return, moving average and the volatility for each stock's closing price.
    
    Parameters:
        data (dict[str, pd.DataFrame]): Dictionary of DataFrames indexed by ticker symbols
        ma_window (int | list[int]): Window size(s) for moving average (Default to 50)
        vol_window (int): Window size for rolling volatility calculation

    Returns:
        dict[str, dict[str, pd.DataFrame | float]]: Dictionary of DataFrames with volatility added
    """
    
    if isinstance(ma_window, int):
        ma_window = [ma_window]

    final_data = {}

    for ticker, df in data.items():
        df = calculate_return(df)
        for ma_w in ma_window:
            df = calculate_moving_average(df, ma_w)
        df = calculate_volatility(df, vol_window)

        final_data[ticker] = {
            'data': df,
            'overall_vol': float(df['Return'].std())
        }

    return final_data

Functions for Calculating RSI and MACD

In [131]:
def calculate_RSI(df: pd.DataFrame, window: int) -> pd.DataFrame:
    """
    Calculates the Relative Strength Index (RSI) for a given DataFrame.

    Parameters:
        df (pd.DataFrame): DataFrame containing stock data with 'Close' prices
        period (int): The number of periods to use for RSI calculation

    Returns:
        pd.DataFrame: DataFrame with an additional 'RSI' column
    """

    df = df.copy()

    avg_gain = (df['Return'].where(df['Return'] > 0, 0)).rolling(window=window).mean()
    avg_loss = (-df['Return'].where(df['Return'] < 0, 0)).rolling(window=window).mean()
    
    rs = avg_gain / avg_loss
    df['RSI'] = 100 - (100 / (1 + rs))
    
    return df

def calculate_MACD(df: pd.DataFrame, short_window: int, long_window: int, signal_window: int) -> pd.DataFrame:
    """
    Calculates the Moving Average Convergence Divergence (MACD) for a given DataFrame.

    Parameters:
        df (pd.DataFrame): DataFrame containing stock data with 'Close' prices
        short_window (int): Short-term EMA window
        long_window (int): Long-term EMA window
        signal_window (int): Signal line EMA window

    Returns:
        pd.DataFrame: DataFrame with additional EMA, MACD, MACD Signal, and MACD Histogram columns
    """

    df = df.copy()

    df['EMA_short'] = df['Close'].ewm(span=short_window, adjust=False).mean()
    df['EMA_long'] = df['Close'].ewm(span=long_window, adjust=False).mean()

    df['MACD'] = df['EMA_short'] - df['EMA_long']
    df['MACD_Signal'] = df['MACD'].ewm(span=signal_window, adjust=False).mean()
    df['MACD_Hist'] = df['MACD'] - df['MACD_Signal']
    
    return df

Function for Calculating Both Indicators

In [132]:
def calculate_indicators(data: dict[str, dict[str, pd.DataFrame | float]], rsi_window: int = 14, short_window: int = 12, long_window: int = 26, signal_window: int = 9):
    """
    Calculates the RSI and MACD for given DataFrames.

    Parameters:
        data (dict[str, dict[str, pd.DataFrame | float]]): Dictionary of DataFrames indexed by ticker symbols
        rsi_period (int): The number of periods to use for RSI calculation (Default to 14)
        short_window (int): Short-term EMA window (Default to 12)
        long_window (int): Long-term EMA window (Default to 26)
        signal_window (int): Signal line EMA window (Default to 9)

    Returns:
        dict[str, dict[str, pd.DataFrame | float]]: Dictionary of DataFrames with EMA, MACD, MACD Signal, and MACD Histogram added
    """

    final_data = {}

    for ticker, d in data.items():
        df = d['data']

        df = calculate_RSI(df, rsi_window)
        df = calculate_MACD(df, short_window, long_window, signal_window)

        final_data[ticker] = {
            'data': df,
            'overall_vol': d['overall_vol']
        }

    return final_data

Function for Filtering Missing Values after all Calculations

In [133]:
def filter_data(data: dict[str, dict[str, pd.DataFrame | float]]):
    '''
    Filters out rows of missing values for given dataFrames

    Parameters:
        data (dict[str, dict[str, pd.DataFrame | float]]): Dictionary of DataFrames indexed by ticker symbols

    Returns:
        dict[str, dict[str, pd.DataFrame | float]]: Dictionary of DataFrames without NaNs
    '''

    final_data = {}

    for ticker, d in data.items():
        df = d['data']

        df = df.loc[df.dropna().index[0]:]

        final_data[ticker] = {
            'data': df,
            'overall_vol': d['overall_vol']
        }
    return final_data

### 📈 Visualization

Function for Plotting Prices, Moving Averages, Volume, RSI, and MACD

In [134]:
def plot_price_and_moving_average(df: pd.DataFrame, fig: go.Figure, row: int) -> None:
    """
    Adds traces for closing price and all moving averages to a specific subplot row.

    Parameters:
        df (pd.DataFrame): DataFrame containing 'Close' and moving average columns.
        fig (go.Figure): Plotly Figure object to which traces will be added.
        row (int): The row number of the subplot to add traces to.
    """

    fig.add_trace(
        go.Scatter(x=df.index, y=df['Close'], name='Close', line=dict(color='black')),
        row=row, col=1
    )
    for c in df.columns:
        if 'Moving_Avg' in c:
            fig.add_trace(
                go.Scatter(x=df.index, y=df[c], name=c),
                row=row, col=1
            )

def plot_volume(df: pd.DataFrame, fig: go.Figure, row: int) -> None:
    """
    Adds a bar chart for volume data to a specific subplot row.

    Parameters:
        df (pd.DataFrame): DataFrame containing 'Volume' column.
        fig (go.Figure): Plotly Figure object to which the volume bar chart will be added.
        row (int): The row number of the subplot to add the volume chart to.
    """

    fig.add_trace(
        go.Bar(x=df.index, y=df['Volume'], name='Volume', marker_color='gray'),
        row=row, col=1
    )

def plot_rsi(df: pd.DataFrame, fig: go.Figure, row: int) -> None:
    """
    Adds a line chart for RSI with threshold lines to a specific subplot row.

    Parameters:
        df (pd.DataFrame): DataFrame containing 'RSI' column.
        fig (go.Figure): Plotly Figure object to which RSI traces will be added.
        row (int): The row number of the subplot to add the RSI plot to.
    """
    
    fig.add_trace(
        go.Scatter(x=df.index, y=df['RSI'], name='RSI', line=dict(color='orange')),
        row=row, col=1
    )
    fig.add_hline(y=70, line_dash='dash', line_color='gray', row=row, col=1)
    fig.add_hline(y=30, line_dash='dash', line_color='gray', row=row, col=1)

def plot_macd(df: pd.DataFrame, fig: go.Figure, row: int) -> None:
    """
    Adds line charts for MACD and MACD Signal, and a bar chart for MACD Histogram to a specific subplot row.

    Parameters:
        df (pd.DataFrame): DataFrame containing 'MACD', 'MACD_Signal', and 'MACD_Hist' columns.
        fig (go.Figure): Plotly Figure object to which MACD traces will be added.
        row (int): The row number of the subplot to add MACD-related plots to.
    """
    fig.add_trace(
        go.Scatter(x=df.index, y=df['MACD'], name='MACD', line=dict(color='blue')),
        row=row, col=1
    )
    fig.add_trace(
        go.Scatter(x=df.index, y=df['MACD_Signal'], name='MACD Signal', line=dict(color='red')),
        row=row, col=1
    )
    fig.add_trace(
        go.Bar(x=df.index, y=df['MACD_Hist'], name='MACD Hist', marker_color='green'),
        row=row, col=1
    )

Function for Plotting all the Technicals

In [135]:
def plot_technical_dashboard(df: pd.DataFrame, ticker: str) -> None:
    """
    Creates a 4-row technical analysis dashboard with subplots for price & moving averages, volume, RSI, and MACD.

    Parameters:
        df (pd.DataFrame): DataFrame containing all necessary columns for plotting technical indicators.
        ticker (str): Stock ticker symbol used for the plot title.
    """
    
    fig = make_subplots(
        rows=4, cols=1,
        shared_xaxes=True,
        vertical_spacing=0.03,
        row_heights=[0.4, 0.2, 0.2, 0.2]
    )

    plot_price_and_moving_average(df, fig, row=1)
    plot_volume(df, fig, row=2)
    plot_rsi(df, fig, row=3)
    plot_macd(df, fig, row=4)

    fig.update_layout(
        height=1000,
        title_text=f"{ticker} Technical Analysis Dashboard",
        title_x=0.5,
        showlegend=True
    )
    fig.update_yaxes(title_text="Price", row=1, col=1)
    fig.update_yaxes(title_text="Volume", row=2, col=1)
    fig.update_yaxes(title_text="RSI", row=3, col=1)
    fig.update_yaxes(title_text="MACD", row=4, col=1)
    fig.update_xaxes(title_text="Date", row=4, col=1)

    fig.show()

Function for Plotting Volatility

In [136]:
t = get_stock_data(TICKERS, START_DATE, END_DATE)
tc = clean_data(t)
tcm = calculate_measures(tc, [50, 200])
tcmi = calculate_indicators(tcm)
tcmif = filter_data(tcmi)

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


In [137]:
tcmif['AAPL']['data']

Unnamed: 0_level_0,Close,Close_Normalized,Volume,Return,Moving_Avg_50,Moving_Avg_200,Rolling_Vol_20,RSI,EMA_short,EMA_long,MACD,MACD_Signal,MACD_Hist
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2022-10-18,141.676712,0.135250,99136600,0.009409,153.227285,155.264472,0.023058,43.938980,140.702613,144.406373,-3.703760,-4.075262,0.371502
2022-10-19,141.785126,0.136053,61758300,0.000765,152.812160,155.080169,0.022760,52.598792,140.869154,144.212207,-3.343053,-3.928820,0.585767
2022-10-20,141.321899,0.132622,64522000,-0.003267,152.302616,154.904887,0.022747,58.791161,140.938807,143.998110,-3.059303,-3.754917,0.695614
2022-10-21,145.145981,0.160953,86548600,0.027059,151.884337,154.772183,0.023498,58.110883,141.586064,144.083137,-2.497073,-3.503348,1.006275
2022-10-24,147.294510,0.176871,75981900,0.014803,151.437870,154.664552,0.023749,55.981832,142.464287,144.321017,-1.856730,-3.174025,1.317294
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-07-11,211.160004,0.650035,39765800,-0.005885,203.897746,222.406590,0.011252,72.156136,208.546027,205.870517,2.675509,1.666214,1.009295
2025-07-14,208.619995,0.631217,38840100,-0.012029,203.825712,222.321306,0.011749,64.510831,208.557406,206.074182,2.483224,1.829616,0.653608
2025-07-15,209.110001,0.634847,42296300,0.002349,203.747099,222.233988,0.011116,68.440367,208.642421,206.299058,2.343363,1.932365,0.410998
2025-07-16,210.160004,0.642626,47490500,0.005021,203.848678,222.156903,0.011010,68.100071,208.875895,206.585054,2.290841,2.004061,0.286781


In [138]:
plot_price_and_volume(tcmif['AAPL']['data'], 'AAPL')

In [139]:
def plot_volatility_scatter(df: pd.DataFrame, ticker: str, columns: list[str], title: str):
    '''
    Plots a scatterplot for 

    Parameters:
        df (pd.DataFrame): DataFrame with closing prices and moving averages
        ticker (str): Equity ticker symbol
        columns (list[str]): List of columns to plot (first element is x-axis, second element is y-axis)
        title (str): Title of the plot
    '''

    fig = go.Figure(go.Scatter(x=df[columns[0]], y=df[columns[1]],
                               mode='markers', name=title))

    fig.update_layout(title=ticker + title,
                      xaxis_title=columns[0], yaxis_title=columns[1])
    fig.show()

In [140]:
plot_volatility_scatter(tcmif['AAPL']['data'], 'AAPL', ['Return', 'Rolling_Vol'], 'test')

KeyError: 'Rolling_Vol'

In [None]:
from dash import Dash, dcc, html, Input, Output

app = Dash(__name__)
tickers = list(data.keys())

app.layout = html.Div([
    html.H1("Stock Dashboard"),
    
    dcc.Dropdown(id='ticker-dropdown', options=[{'label': t, 'value': t} for t in tickers],
                 value=tickers[0]),
    
    dcc.Graph(id='price-ma-graph'),
    dcc.Graph(id='volume-hist-graph'),
    dcc.Graph(id='vol-scatter-graph')
])

@app.callback(
    [Output('price-ma-graph', 'figure'),
     Output('volume-hist-graph', 'figure'),
     Output('vol-scatter-graph', 'figure')],
    [Input('ticker-dropdown', 'value')]
)
def update_graphs(ticker):
    df = data[ticker]
    return (
        go.Figure([
            go.Scatter(x=df.index, y=df['Close'], name='Close'),
            go.Scatter(x=df.index, y=df['Moving_Avg'], name='Moving Avg')
        ]).update_layout(title=f"{ticker} Price and Moving Average"),

        go.Figure(go.Histogram(x=df['Volume']))
        .update_layout(title=f"{ticker} Volume Histogram"),

        go.Figure(go.Scatter(x=df['Return'], y=df['Volatility'], mode='markers'))
        .update_layout(title=f"{ticker} Volatility Scatter")
    )

if __name__ == '__main__':
    app.run_server(debug=True)
