<a href="https://colab.research.google.com/github/Legajo/Colab-Notebooks/blob/main/Hw1_Q4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Amazon (AMZN) Earnings Surprise Analysis
# Google Colab Notebook

# ============================================================================
# SETUP: Install and Import Required Libraries
# ============================================================================

!pip install yfinance -q

import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta

print("✓ Libraries imported successfully\n")

✓ Libraries imported successfully



In [3]:
# ============================================================================
# STEP 1: Load Earnings Data from CSV
# ============================================================================

print("=" * 70)
print("STEP 1: Loading Earnings Data")
print("=" * 70)

# Upload file in Colab
from google.colab import files
import os

# Check if file exists, if not, prompt upload
if not os.path.exists("ha1_Amazon.csv"):
    print("Please upload the ha1_Amazon.csv file:")
    uploaded = files.upload()
    print("✓ File uploaded successfully\n")

# Load the earnings data with semicolon delimiter
earnings_df = pd.read_csv("ha1_Amazon.csv", delimiter=';')

print(f"Earnings data shape: {earnings_df.shape}")
print(f"\nColumn names: {list(earnings_df.columns)}")
print(f"\nFirst few rows:")
print(earnings_df.head())

# Display data info
print(f"\nData info:")
print(earnings_df.info())

print(f"\n✓ Earnings data loaded successfully\n")


STEP 1: Loading Earnings Data
Please upload the ha1_Amazon.csv file:


Saving ha1_Amazon.csv to ha1_Amazon.csv
✓ File uploaded successfully

Earnings data shape: (117, 6)

Column names: ['Symbol', 'Company', 'Earnings Date', 'EPS Estimate', 'Reported EPS', 'Surprise (%)']

First few rows:
  Symbol           Company                 Earnings Date EPS Estimate  \
0   AMZN    Amazon.com Inc    April 29, 2026 at 6 AM EDT            -   
1   AMZN    Amazon.com Inc  February 4, 2026 at 4 PM EST            -   
2   AMZN    Amazon.com Inc  October 29, 2025 at 6 AM EDT            -   
3   AMZN    Amazon.com Inc     July 30, 2025 at 4 PM EDT            -   
4   AMZN  Amazon.com, Inc.       May 1, 2025 at 4 PM EDT       ???.36   

  Reported EPS Surprise (%)  
0            -            -  
1            -            -  
2            -            -  
3            -            -  
4       ???.59       +16.74  

Data info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 117 entries, 0 to 116
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype 
---

In [4]:
# ============================================================================
# STEP 2: Download Historical Price Data using yfinance
# ============================================================================

print("=" * 70)
print("STEP 2: Downloading Amazon Historical Price Data")
print("=" * 70)

# Download AMZN price data
# Get data from early date to ensure we cover all earnings dates
amzn = yf.download('AMZN', start='1997-01-01', end=datetime.now().strftime('%Y-%m-%d'),
                   progress=False, auto_adjust=False)

# Handle different yfinance data structures
try:
    if isinstance(amzn.columns, pd.MultiIndex):
        prices = amzn['Close']['AMZN']
    else:
        prices = amzn['Close']
except KeyError:
    prices = amzn['Close']

prices = prices.dropna()

print(f"Price data range: {prices.index[0].date()} to {prices.index[-1].date()}")
print(f"Total trading days: {len(prices):,}")
print(f"\n✓ Price data downloaded successfully\n")


STEP 2: Downloading Amazon Historical Price Data
Price data range: 1997-05-15 to 2025-10-02
Total trading days: 7,141

✓ Price data downloaded successfully



In [5]:
# ============================================================================
# STEP 3: Calculate 2-Day Percentage Changes for All Historical Dates
# ============================================================================

print("=" * 70)
print("STEP 3: Calculating 2-Day Percentage Changes")
print("=" * 70)

# Calculate 2-day returns: (Close_Day3 / Close_Day1) - 1
# For each Day 2, we calculate the return from Day 1 to Day 3

two_day_returns = {}

for i in range(1, len(prices) - 1):
    day1_price = prices.iloc[i - 1]
    day2_date = prices.index[i]
    day3_price = prices.iloc[i + 1]

    # Calculate 2-day return
    two_day_return = (day3_price / day1_price) - 1

    two_day_returns[day2_date] = two_day_return

# Convert to DataFrame
returns_df = pd.DataFrame(list(two_day_returns.items()),
                          columns=['Date', 'TwoDay_Return'])
returns_df['TwoDay_Return_Pct'] = returns_df['TwoDay_Return'] * 100.0

print(f"Total 2-day returns calculated: {len(returns_df):,}")
print(f"\nSample returns:")
print(returns_df.head(10))

print(f"\nAll historical 2-day returns statistics:")
print(f"  Mean: {returns_df['TwoDay_Return_Pct'].mean():.2f}%")
print(f"  Median: {returns_df['TwoDay_Return_Pct'].median():.2f}%")
print(f"  Std Dev: {returns_df['TwoDay_Return_Pct'].std():.2f}%")
print(f"  Min: {returns_df['TwoDay_Return_Pct'].min():.2f}%")
print(f"  Max: {returns_df['TwoDay_Return_Pct'].max():.2f}%")

print(f"\n✓ 2-day returns calculated\n")

STEP 3: Calculating 2-Day Percentage Changes
Total 2-day returns calculated: 7,139

Sample returns:
        Date  TwoDay_Return  TwoDay_Return_Pct
0 1997-05-16      -0.127659         -12.765910
1 1997-05-19      -0.054211          -5.421125
2 1997-05-20      -0.164639         -16.463936
3 1997-05-21      -0.146494         -14.649446
4 1997-05-22       0.051097           5.109736
5 1997-05-23       0.134328          13.432769
6 1997-05-27       0.020840           2.083997
7 1997-05-28      -0.049351          -4.935140
8 1997-05-29      -0.020415          -2.041453
9 1997-05-30       0.003468           0.346799

All historical 2-day returns statistics:
  Mean: 0.34%
  Median: 0.17%
  Std Dev: 4.96%
  Min: -29.62%
  Max: 43.49%

✓ 2-day returns calculated



In [8]:
# ============================================================================
# STEP 4: Identify Positive Earnings Surprises
# ============================================================================

print("=" * 70)
print("STEP 4: Identifying Positive Earnings Surprises")
print("=" * 70)

# Convert earnings date column to datetime (adjust column name as needed)
# Common column names: 'Date', 'Earnings Date', 'date', etc.
date_column = [col for col in earnings_df.columns if 'date' in col.lower()][0]

# Parse dates with mixed format, removing timezone info
def parse_earnings_date(date_str):
    """Parse earnings date string, removing timezone information"""
    if pd.isna(date_str):
        return pd.NaT

    # Remove timezone suffixes (EDT, EST, etc.)
    date_str = str(date_str)
    for tz in [' EDT', ' EST', ' CDT', ' CST', ' PDT', ' PST']:
        date_str = date_str.replace(tz, '')

    try:
        return pd.to_datetime(date_str, errors='coerce')
    except:
        return pd.NaT

earnings_df['Earnings_Date'] = earnings_df[date_column].apply(parse_earnings_date)

# Identify EPS columns (adjust names as needed)
eps_columns = earnings_df.columns.tolist()
print(f"Available columns: {eps_columns}")

# Find actual and estimated EPS columns
actual_col = [col for col in eps_columns if 'actual' in col.lower() or 'reported' in col.lower()]
estimate_col = [col for col in eps_columns if 'estimate' in col.lower() or 'expected' in col.lower() or 'consensus' in col.lower()]

if actual_col and estimate_col:
    actual_col = actual_col[0]
    estimate_col = estimate_col[0]
    print(f"\nUsing columns:")
    print(f"  Actual EPS: {actual_col}")
    print(f"  Estimated EPS: {estimate_col}")
else:
    print("\nPlease specify the correct column names:")
    print(earnings_df.head())
    # Manual column selection if needed
    actual_col = 'Actual EPS'  # Adjust this
    estimate_col = 'Estimated EPS'  # Adjust this

# Convert EPS columns to numeric, handling any non-numeric values
earnings_df['Actual_EPS'] = pd.to_numeric(earnings_df[actual_col], errors='coerce')
earnings_df['Estimated_EPS'] = pd.to_numeric(earnings_df[estimate_col], errors='coerce')

# Identify positive surprises
earnings_df['Is_Positive_Surprise'] = earnings_df['Actual_EPS'] > earnings_df['Estimated_EPS']
earnings_df['Surprise_Amount'] = earnings_df['Actual_EPS'] - earnings_df['Estimated_EPS']
earnings_df['Surprise_Pct'] = ((earnings_df['Actual_EPS'] - earnings_df['Estimated_EPS']) /
                                earnings_df['Estimated_EPS'].abs()) * 100

positive_surprises = earnings_df[earnings_df['Is_Positive_Surprise'] == True].copy()

print(f"\nTotal earnings announcements: {len(earnings_df)}")
print(f"Positive surprises: {len(positive_surprises)}")
print(f"Negative/neutral surprises: {len(earnings_df) - len(positive_surprises)}")

print(f"\nPositive surprises details:")
print(positive_surprises[['Earnings_Date', 'Estimated_EPS', 'Actual_EPS', 'Surprise_Amount', 'Surprise_Pct']].to_string())

print(f"\n✓ Positive surprises identified: {len(positive_surprises)} data points\n")


STEP 4: Identifying Positive Earnings Surprises
Available columns: ['Symbol', 'Company', 'Earnings Date', 'EPS Estimate', 'Reported EPS', 'Surprise (%)', 'Earnings_Date']

Using columns:
  Actual EPS: Reported EPS
  Estimated EPS: EPS Estimate

Total earnings announcements: 117
Positive surprises: 33
Negative/neutral surprises: 84

Positive surprises details:
         Earnings_Date  Estimated_EPS  Actual_EPS  Surprise_Amount  Surprise_Pct
8  2024-04-30 16:00:00           0.83        0.98             0.15     18.072289
9  2024-02-01 16:00:00           0.80        1.00             0.20     25.000000
10 2023-10-26 16:00:00           0.58        0.94             0.36     62.068966
11 2023-08-03 16:00:00           0.35        0.65             0.30     85.714286
12 2023-04-27 16:00:00           0.21        0.31             0.10     47.619048
13 2023-02-02 16:00:00           0.18        0.25             0.07     38.888889
15 2022-07-28 16:00:00           0.14        0.18             0.04     