# Black-Scholes Model Empirical Analysis: Data Download

This notebook downloads 1 month of market data for the Black-Scholes empirical analysis,
following the methodology of Salami (2024).

## Install Required Packages

```bash
pip install yfinance pandas numpy pandas-datareader matplotlib
```

In [None]:
# Uncomment to install packages
# !pip install yfinance pandas numpy pandas-datareader matplotlib

In [None]:
import yfinance as yf
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import os
import warnings
warnings.filterwarnings('ignore')

# Create data directory
DATA_DIR = 'data'
os.makedirs(DATA_DIR, exist_ok=True)

print("=" * 60)
print("BLACK-SCHOLES EMPIRICAL ANALYSIS: DATA DOWNLOAD")
print("=" * 60)
print(f"Download Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print(f"Data Directory: {os.path.abspath(DATA_DIR)}")

## 1. Define Study Parameters

Following Salami (2024), we select stocks from three sectors:
- **Financial**: BAC, JPM, WFC
- **Healthcare**: MRK, PFE, UNH
- **Technology**: AAPL, MSFT, NVDA

In [None]:
# Stock selection (same as Salami 2024)
TICKERS = {
    'BAC': {'name': 'Bank of America Corporation', 'sector': 'Financial'},
    'JPM': {'name': 'JPMorgan Chase & Co.', 'sector': 'Financial'},
    'WFC': {'name': 'Wells Fargo & Company', 'sector': 'Financial'},
    'MRK': {'name': 'Merck & Company Inc.', 'sector': 'Healthcare'},
    'PFE': {'name': 'Pfizer Inc.', 'sector': 'Healthcare'},
    'UNH': {'name': 'UnitedHealth Group Inc.', 'sector': 'Healthcare'},
    'AAPL': {'name': 'Apple Inc.', 'sector': 'Technology'},
    'MSFT': {'name': 'Microsoft Corporation', 'sector': 'Technology'},
    'NVDA': {'name': 'NVIDIA Corporation', 'sector': 'Technology'}
}

# Time period: 1 month ending today
END_DATE = datetime.now()
START_DATE = END_DATE - timedelta(days=30)

print(f"\nStudy Period: {START_DATE.strftime('%Y-%m-%d')} to {END_DATE.strftime('%Y-%m-%d')}")
print(f"\nSelected Stocks ({len(TICKERS)}):")
for ticker, info in TICKERS.items():
    print(f"  {ticker}: {info['name']} ({info['sector']})")

## 2. Download Risk-Free Rate

In [None]:
def download_risk_free_rate(start_date, end_date):
    """Download 3-month Treasury rate from FRED (as used in Salami 2024)."""
    try:
        import pandas_datareader as pdr
        
        # Download 3-month Treasury rate (DGS3MO) as in the paper
        treasury = pdr.get_data_fred('DGS3MO', start=start_date, end=end_date)
        treasury = treasury.dropna()
        
        # Convert to decimal
        treasury['rate'] = treasury['DGS3MO'] / 100
        treasury = treasury.reset_index()
        treasury.columns = ['date', 'rate_pct', 'rate']
        
        print(f"✓ Downloaded {len(treasury)} days of risk-free rate data")
        print(f"  Latest rate: {treasury['rate'].iloc[-1]:.4f} ({treasury['rate'].iloc[-1]*100:.2f}%)")
        
        return treasury
        
    except Exception as e:
        print(f"✗ Failed to download from FRED: {e}")
        print("  Creating fallback data with 4.5% rate")
        dates = pd.date_range(start=start_date, end=end_date, freq='B')
        return pd.DataFrame({
            'date': dates,
            'rate_pct': 4.5,
            'rate': 0.045
        })

risk_free_df = download_risk_free_rate(START_DATE, END_DATE)
risk_free_df.to_csv(f'{DATA_DIR}/risk_free_rate.csv', index=False)
print(f"  Saved to {DATA_DIR}/risk_free_rate.csv")

## 3. Download Stock Price History (1 Month)

In [None]:
def download_stock_history(tickers, start_date, end_date):
    """Download daily stock prices for all tickers."""
    all_data = []
    
    for ticker in tickers:
        try:
            stock = yf.Ticker(ticker)
            hist = stock.history(start=start_date, end=end_date)
            
            if len(hist) > 0:
                hist = hist.reset_index()
                hist['ticker'] = ticker
                hist = hist[['Date', 'ticker', 'Open', 'High', 'Low', 'Close', 'Volume']]
                hist.columns = ['date', 'ticker', 'open', 'high', 'low', 'close', 'volume']
                all_data.append(hist)
                print(f"  ✓ {ticker}: {len(hist)} trading days")
            else:
                print(f"  ✗ {ticker}: No data")
                
        except Exception as e:
            print(f"  ✗ {ticker}: Error - {e}")
    
    return pd.concat(all_data, ignore_index=True) if all_data else None

print("\nDownloading stock price history...")
stock_history = download_stock_history(TICKERS.keys(), START_DATE, END_DATE)

if stock_history is not None:
    stock_history.to_csv(f'{DATA_DIR}/stock_history.csv', index=False)
    print(f"\n✓ Saved {len(stock_history)} records to {DATA_DIR}/stock_history.csv")

## 4. Download Current Stock Information

In [None]:
def download_stock_info(tickers_dict):
    """Download current stock information."""
    stock_info = []
    
    for ticker, meta in tickers_dict.items():
        try:
            stock = yf.Ticker(ticker)
            info = stock.info
            
            spot = info.get('currentPrice') or info.get('regularMarketPrice') or info.get('previousClose')
            div_yield = info.get('dividendYield') or 0
            
            stock_info.append({
                'ticker': ticker,
                'name': meta['name'],
                'sector': meta['sector'],
                'spot_price': spot,
                'dividend_yield': div_yield,
                'market_cap': info.get('marketCap', 0),
                'beta': info.get('beta', 1.0)
            })
            print(f"  ✓ {ticker}: ${spot:.2f}, Div: {div_yield*100:.2f}%")
            
        except Exception as e:
            print(f"  ✗ {ticker}: Error - {e}")
    
    return pd.DataFrame(stock_info)

print("\nDownloading current stock information...")
stock_info_df = download_stock_info(TICKERS)
stock_info_df.to_csv(f'{DATA_DIR}/stock_info.csv', index=False)
print(f"\n✓ Saved to {DATA_DIR}/stock_info.csv")
stock_info_df

## 5. Download Option Chains

Download option chains for each stock, selecting expiration ~30-45 days out.

In [None]:
def download_all_option_chains(tickers, min_days=25, max_days=60):
    """Download option chains for all tickers."""
    all_calls = []
    all_puts = []
    summary = []
    
    today = datetime.now().date()
    
    for ticker in tickers:
        try:
            stock = yf.Ticker(ticker)
            expirations = stock.options
            
            if not expirations:
                print(f"  ✗ {ticker}: No options available")
                continue
            
            # Find suitable expiration
            selected_exp = None
            for exp in expirations:
                exp_date = datetime.strptime(exp, '%Y-%m-%d').date()
                days = (exp_date - today).days
                if min_days <= days <= max_days:
                    selected_exp = exp
                    break
            
            if not selected_exp:
                selected_exp = expirations[0]
            
            exp_date = datetime.strptime(selected_exp, '%Y-%m-%d').date()
            days_to_exp = (exp_date - today).days
            
            # Get option chain
            chain = stock.option_chain(selected_exp)
            
            # Process calls
            calls = chain.calls.copy()
            calls['ticker'] = ticker
            calls['expiration'] = selected_exp
            calls['days_to_expiry'] = days_to_exp
            calls['T'] = days_to_exp / 365
            all_calls.append(calls)
            
            # Process puts
            puts = chain.puts.copy()
            puts['ticker'] = ticker
            puts['expiration'] = selected_exp
            puts['days_to_expiry'] = days_to_exp
            puts['T'] = days_to_exp / 365
            all_puts.append(puts)
            
            summary.append({
                'ticker': ticker,
                'expiration': selected_exp,
                'days_to_expiry': days_to_exp,
                'T': days_to_exp / 365,
                'num_calls': len(calls),
                'num_puts': len(puts)
            })
            
            print(f"  ✓ {ticker}: Exp {selected_exp} ({days_to_exp}d), {len(calls)} calls, {len(puts)} puts")
            
        except Exception as e:
            print(f"  ✗ {ticker}: Error - {e}")
    
    calls_df = pd.concat(all_calls, ignore_index=True) if all_calls else None
    puts_df = pd.concat(all_puts, ignore_index=True) if all_puts else None
    summary_df = pd.DataFrame(summary)
    
    return calls_df, puts_df, summary_df

print("\nDownloading option chains...")
calls_df, puts_df, options_summary = download_all_option_chains(TICKERS.keys())

if calls_df is not None:
    calls_df.to_csv(f'{DATA_DIR}/options_calls.csv', index=False)
    puts_df.to_csv(f'{DATA_DIR}/options_puts.csv', index=False)
    options_summary.to_csv(f'{DATA_DIR}/options_summary.csv', index=False)
    print(f"\n✓ Saved {len(calls_df)} calls and {len(puts_df)} puts")

options_summary

## 6. Preview Downloaded Data

In [None]:
print("\n" + "=" * 60)
print("STOCK PRICE HISTORY PREVIEW")
print("=" * 60)

# Show last 5 days for each stock
for ticker in TICKERS.keys():
    subset = stock_history[stock_history['ticker'] == ticker].tail(3)
    if len(subset) > 0:
        print(f"\n{ticker}:")
        print(subset[['date', 'close']].to_string(index=False))

In [None]:
print("\n" + "=" * 60)
print("OPTION CHAIN PREVIEW (AAPL)")
print("=" * 60)

# Show sample call options
aapl_calls = calls_df[calls_df['ticker'] == 'AAPL'][[
    'strike', 'lastPrice', 'bid', 'ask', 'volume', 'impliedVolatility'
]].head(10)
print("\nCall Options:")
print(aapl_calls.to_string(index=False))

## 7. Data Summary

In [None]:
print("\n" + "=" * 60)
print("DATA DOWNLOAD COMPLETE")
print("=" * 60)

print(f"\nStudy Period: {START_DATE.strftime('%Y-%m-%d')} to {END_DATE.strftime('%Y-%m-%d')}")
print(f"\nFiles Created in '{DATA_DIR}/' directory:")
for f in sorted(os.listdir(DATA_DIR)):
    size = os.path.getsize(os.path.join(DATA_DIR, f))
    print(f"  • {f} ({size:,} bytes)")

print(f"\nData Summary:")
print(f"  • Stocks: {len(TICKERS)}")
print(f"  • Trading Days: {stock_history['date'].nunique()}")
print(f"  • Call Options: {len(calls_df)}")
print(f"  • Put Options: {len(puts_df)}")
print(f"  • Risk-Free Rate (latest): {risk_free_df['rate'].iloc[-1]*100:.2f}%")

print("\n" + "=" * 60)
print("NEXT STEP: Run the QMD analysis file")
print("=" * 60)