Features that I might need
- Last Price
- Open Price
- Close Price
- Day to Day gap
- P/E Ratio
- Market Cap
- Earnings date
- No. Days Post Earnings
- Earnings Actual vs Estimate Difference
- Revenue vs Earnings

In [27]:
import yfinance as yf
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import pandas as pd

In [14]:
ticker = yf.Ticker("QQQ")
prices_df = ticker.history(period="5y")
ticker.earnings_history
# prices_df

HTTP Error 404: {"quoteSummary":{"result":null,"error":{"code":"Not Found","description":"No fundamentals data found for symbol: QQQ"}}}


In [None]:
# YFinance limitation can only get income stuff up to 4 quarters back

def get_daily_features(ticker_symbol="AAPL", period="1y"):
    """
    Generates a DataFrame of daily features for a given stock ticker.
    """
    print(f"Fetching data for {ticker_symbol}...")

    # Create ticker object
    ticker = yf.Ticker(ticker_symbol)

    # -----------------------------------------------------------------
    # 1. GET DAILY PRICE DATA
    # -----------------------------------------------------------------
    price_data = ticker.history(period=period)

    # Make index timezone-naive for easier merging
    price_data.index = price_data.index.tz_localize(None)

    # Calculate Day-to-Day Gap
    price_data['Previous_Close'] = price_data['Close'].shift(1)
    price_data['Day_Gap_Percent'] = (price_data['Open'] - price_data['Previous_Close']) / price_data['Previous_Close'] * 100

    # Select only the columns we need for the final daily frame
    daily_df = price_data[['Open', 'Close', 'Day_Gap_Percent']]

    # -----------------------------------------------------------------
    # 2. GET QUARTERLY FUNDAMENTALS & EARNINGS DATA
    # -----------------------------------------------------------------

    # --- Financials (for Revenue and Net Income) ---
    q_financials = ticker.quarterly_financials.T # Transpose to have dates as rows
    q_financials.index = pd.to_datetime(q_financials.index)

    # Get Revenue and Net Income (Earnings)
    q_financials['Revenue'] = q_financials.get('Total Revenue')
    q_financials['Earnings'] = q_financials.get('Net Income')

    # --- Earnings (for EPS and Surprise) ---
    q_earnings = ticker.earnings_history
    q_earnings.index = pd.to_datetime(q_earnings.index)

    # Calculate Earnings Surprise
    # q_earnings['Earnings_Surprise_%'] = (q_earnings['epsActual'] - q_earnings['epsEstimate']) / q_earnings['epsEstimate'] * 100
    # Handle cases where estimate is zero or null
    # q_earnings['Earnings_Surprise_%'].replace([np.inf, -np.inf], np.nan, inplace=True)

    # --- Combine Quarterly Data ---
    # Merge financials and earnings on their quarterly dates
    quarterly_data = pd.merge(q_financials[['Revenue', 'Earnings']], 
                                q_earnings[['epsActual', 'epsEstimate', 'surprisePercent', 'epsDifference']], 
                                left_index=True, 
                                right_index=True, 
                                how='outer')

    # Calculate TTM (Trailing Twelve Months) EPS
    # We use 'Actual' EPS for our TTM calculation
    # quarterly_data['TTM_EPS'] = quarterly_data['epsActual'].rolling(window=4).sum()

    # -----------------------------------------------------------------
    # 3. GET STATIC & EVENT DATA
    # -----------------------------------------------------------------

    # --- Market Cap (requires current shares outstanding) ---
    # Note: This uses the *current* share count for all historical data.
    # A more precise (and much harder) method would use historical shares.
    try:
        shares_outstanding = ticker.info['sharesOutstanding']
    except Exception:
        print(f"Warning: Could not get 'sharesOutstanding' for {ticker_symbol}.")
        shares_outstanding = np.nan

    # --- Earnings Dates ---
    try:
        earnings_dates = ticker.earnings_dates.reset_index()
        # Filter for past earnings dates
        earnings_dates = earnings_dates[earnings_dates['Reported EPS'].notna()]
        earnings_dates['Earnings_Date'] = earnings_dates['Earnings Date'].dt.tz_localize(None).dt.date
        earnings_dates = earnings_dates[['Earnings_Date']].drop_duplicates()
        earnings_dates.set_index(pd.to_datetime(earnings_dates['Earnings_Date']), inplace=True)
        # We'll use this for the 'Days_Post_Earnings' calculation
    except Exception:
        print(f"Warning: Could not get 'earnings_dates' for {ticker_symbol}.")
        earnings_dates = pd.DataFrame(columns=['Earnings_Date'])


    # -----------------------------------------------------------------
    # 4. MERGE ALL DATA INTO A DAILY FRAME
    # -----------------------------------------------------------------

    # Use merge_asof to map quarterly data to all days in that quarter
    # This finds the last-known quarterly value for any given day.
    final_df = pd.merge_asof(daily_df.sort_index(),
                                quarterly_data.sort_index(),
                                left_index=True,
                                right_index=True,
                                direction='backward') # 'backward' = use last known value

    # Merge the earnings dates in the same way
    # We add a copy of the index as a column to calculate the day difference
    earnings_dates['Last_Earnings_Date'] = earnings_dates.index
    final_df = pd.merge_asof(final_df.sort_index(),
                                earnings_dates[['Last_Earnings_Date']].sort_index(),
                                left_index=True,
                                right_index=True,
                                direction='backward')

    # -----------------------------------------------------------------
    # 5. CALCULATE FINAL FEATURES
    # -----------------------------------------------------------------

    # Calculate Historical P/E
    # final_df['P_E_Ratio'] = final_df['Close'] / final_df['TTM_EPS']

    # Calculate Historical Market Cap
    final_df['Market_Cap'] = final_df['Close'] * shares_outstanding

    # Calculate No. Days Post Earnings
    final_df['Days_Post_Earnings'] = (final_df.index - final_df['Last_Earnings_Date']).dt.days

    # Rename and select final columns for clarity
    final_df.rename(columns={
        'Open': 'Open_Price',
        'Close': 'Close_Price',
        'epsActual': 'Quarterly_Actual_EPS',
        'epsEstimate': 'Quarterly_Estimate_EPS',
    }, inplace=True)

    return final_df

# --- --- --- --- --- ---
#      RUN THE CODE
# --- --- --- --- --- ---
# if __name__ == "__main__":
    
#     # Get features for Apple
#     features_aapl = get_daily_features("AAPL")
#     print("\n--- Daily Features for AAPL (Last 5 Days) ---")
#     print(features_aapl.tail())
    
#     # Get features for QQQ
#     # Note: QQQ will have 'NaN' for many fundamental features
#     features_qqq = get_daily_features("QQQ")
#     print("\n--- Daily Features for QQQ (Last 5 Days) ---")
#     print(features_qqq.tail())

In [23]:
features_aapl = get_daily_features("AAPL", '1y')

Fetching data for AAPL...


In [24]:
features_aapl

Unnamed: 0_level_0,Open_Price,Close_Price,Day_Gap_Percent,Revenue,Earnings,Quarterly_Actual_EPS,Quarterly_Estimate_EPS,surprisePercent,epsDifference,Last_Earnings_Date,Market_Cap,Days_Post_Earnings
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2024-10-18,235.085888,233.911362,,9.493000e+10,1.473600e+10,0.97,0.94654,0.0248,0.02,2024-08-01,3.471336e+12,78
2024-10-21,233.363911,235.384506,-0.234042,9.493000e+10,1.473600e+10,0.97,0.94654,0.0248,0.02,2024-08-01,3.493198e+12,81
2024-10-22,232.806520,234.767395,-1.095224,9.493000e+10,1.473600e+10,0.97,0.94654,0.0248,0.02,2024-08-01,3.484040e+12,82
2024-10-23,232.995637,229.691010,-0.754687,9.493000e+10,1.473600e+10,0.97,0.94654,0.0248,0.02,2024-08-01,3.408704e+12,83
2024-10-24,228.914614,229.501892,-0.338017,9.493000e+10,1.473600e+10,0.97,0.94654,0.0248,0.02,2024-08-01,3.405898e+12,84
...,...,...,...,...,...,...,...,...,...,...,...,...
2025-10-13,249.380005,247.660004,1.675705,9.403600e+10,2.343400e+10,1.57,1.42572,0.1012,0.14,2025-07-31,3.675371e+12,74
2025-10-14,246.600006,247.770004,-0.428005,9.403600e+10,2.343400e+10,1.57,1.42572,0.1012,0.14,2025-07-31,3.677003e+12,75
2025-10-15,249.490005,249.339996,0.694193,9.403600e+10,2.343400e+10,1.57,1.42572,0.1012,0.14,2025-07-31,3.700303e+12,76
2025-10-16,248.250000,247.449997,-0.437153,9.403600e+10,2.343400e+10,1.57,1.42572,0.1012,0.14,2025-07-31,3.672254e+12,77


In [28]:
# Assuming 'features_aapl' DataFrame is already created and populated 
# from the previous code block. Make sure that code has run successfully.

if features_aapl is not None and not features_aapl.empty:
    print("Generating comparison charts...")
    
    # List of columns to compare against Close_Price
    # Exclude 'Close_Price' itself and any date/index related columns if necessary
    comparison_columns = [col for col in features_aapl.columns if col != 'Close_Price' and 'Date' not in col]
    
    # Loop through each column and create a dual-axis chart
    for feature_col in comparison_columns:
        
        # Check if the column has data to plot (not all NaNs)
        if features_aapl[feature_col].isnull().all():
            print(f"Skipping '{feature_col}' as it contains only NaN values.")
            continue
            
        print(f"Plotting Close_Price vs. {feature_col}...")
        
        # Create a figure with secondary y-axis
        fig = make_subplots(specs=[[{"secondary_y": True}]])

        # Add Close Price trace (Primary Y-axis)
        fig.add_trace(
            go.Scatter(
                x=features_aapl.index, 
                y=features_aapl['Close_Price'], 
                name="Close Price",
                line=dict(color='blue')
            ),
            secondary_y=False,
        )

        # Add the other feature trace (Secondary Y-axis)
        # Determine plot type (line or bar) based on feature name or data type
        if 'Percent' in feature_col or 'surprise' in feature_col or 'Gap' in feature_col:
             fig.add_trace(
                go.Bar(
                    x=features_aapl.index, 
                    y=features_aapl[feature_col], 
                    name=feature_col,
                    marker=dict(color='orange'),
                    opacity=0.7
                ),
                secondary_y=True,
            )
        else:
             fig.add_trace(
                go.Scatter(
                    x=features_aapl.index, 
                    y=features_aapl[feature_col], 
                    name=feature_col,
                    line=dict(color='orange') # Different color for distinction
                ),
                secondary_y=True,
            )


        # Update layout and add titles
        fig.update_layout(
            title_text=f"AAPL: Close Price vs. {feature_col}",
            xaxis_title="Date",
            legend_title="Features"
        )

        # Set Y-axis titles
        fig.update_yaxes(title_text="Close Price ($)", secondary_y=False)
        fig.update_yaxes(title_text=feature_col, secondary_y=True)

        fig.show()

else:
    print("DataFrame 'features_aapl' is empty or not defined. Cannot generate plots.")

Generating comparison charts...
Plotting Close_Price vs. Open_Price...


Plotting Close_Price vs. Day_Gap_Percent...


Plotting Close_Price vs. Revenue...


Plotting Close_Price vs. Earnings...


Plotting Close_Price vs. Quarterly_Actual_EPS...


Plotting Close_Price vs. Quarterly_Estimate_EPS...


Plotting Close_Price vs. surprisePercent...


Plotting Close_Price vs. epsDifference...


Plotting Close_Price vs. Market_Cap...


Plotting Close_Price vs. Days_Post_Earnings...
