In [2]:
import yfinance as yf
import time
import random
from requests.exceptions import RequestException

def fetch_with_retry(ticker, period="max", max_retries=3, backoff_factor=2):
    """
    Fetch stock data with exponential backoff retry logic
    
    Args:
        ticker: yfinance Ticker object
        period: time period to fetch (default: 'max')
        max_retries: maximum number of retry attempts
        backoff_factor: factor to increase wait time between retries
    
    Returns:
        DataFrame with historical data or None if all retries fail
    """
    retries = 0
    while retries <= max_retries:
        try:
            # Try to get historical data
            return ticker.history(period=period)["Close"]
        except Exception as e:
            retries += 1
            if retries > max_retries:
                print(f"Failed to fetch data after {max_retries} retries")
                return None
            
            # Calculate wait time with jitter to avoid thundering herd
            wait_time = backoff_factor ** retries + random.uniform(0.1, 1.0)
            print(f"Rate limit hit. Retrying in {wait_time:.2f} seconds... (Attempt {retries}/{max_retries})")
            time.sleep(wait_time)

# Fetch historical data for AAPL and MSFT
aapl = yf.Ticker("AAPL")
msft = yf.Ticker("MSFT")

# Try with max period first, fall back to shorter periods if needed
aapl_hist = fetch_with_retry(aapl, period="max")
if aapl_hist is None:
    print("Falling back to 5y period for AAPL")
    aapl_hist = fetch_with_retry(aapl, period="5y")
    
msft_hist = fetch_with_retry(msft, period="max")
if msft_hist is None:
    print("Falling back to 5y period for MSFT")
    msft_hist = fetch_with_retry(msft, period="5y")

# Verify we have data
if aapl_hist is not None and msft_hist is not None:
    print(f"AAPL data points: {len(aapl_hist)}")
    print(f"MSFT data points: {len(msft_hist)}")
else:
    print("Failed to retrieve complete data")

Rate limit hit. Retrying in 2.16 seconds... (Attempt 1/3)
Rate limit hit. Retrying in 4.43 seconds... (Attempt 2/3)
Rate limit hit. Retrying in 4.43 seconds... (Attempt 2/3)
Rate limit hit. Retrying in 8.30 seconds... (Attempt 3/3)
Rate limit hit. Retrying in 8.30 seconds... (Attempt 3/3)
Failed to fetch data after 3 retries
Falling back to 5y period for AAPL
Failed to fetch data after 3 retries
Falling back to 5y period for AAPL
Rate limit hit. Retrying in 2.43 seconds... (Attempt 1/3)
Rate limit hit. Retrying in 2.43 seconds... (Attempt 1/3)
Rate limit hit. Retrying in 4.58 seconds... (Attempt 2/3)
Rate limit hit. Retrying in 4.58 seconds... (Attempt 2/3)
Rate limit hit. Retrying in 8.39 seconds... (Attempt 3/3)
Rate limit hit. Retrying in 8.39 seconds... (Attempt 3/3)
Failed to fetch data after 3 retries
Failed to fetch data after 3 retries
Rate limit hit. Retrying in 2.75 seconds... (Attempt 1/3)
Rate limit hit. Retrying in 2.75 seconds... (Attempt 1/3)
Rate limit hit. Retrying in 

# Alternative Data Source: yahooquery

We'll install and use yahooquery as an alternative to yfinance since we've encountered rate limiting issues.

In [4]:
# Install yahooquery package
!pip install yahooquery
print("yahooquery installation complete")

Defaulting to user installation because normal site-packages is not writeable
Collecting yahooquery
  Downloading yahooquery-2.4.1-py3-none-any.whl.metadata (4.8 kB)
Collecting yahooquery
  Downloading yahooquery-2.4.1-py3-none-any.whl.metadata (4.8 kB)
Collecting curl-cffi>=0.10.0 (from yahooquery)
  Downloading curl_cffi-0.11.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (14 kB)
Collecting curl-cffi>=0.10.0 (from yahooquery)
  Downloading curl_cffi-0.11.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (14 kB)
Collecting lxml>=4.9.3 (from yahooquery)
  Downloading lxml-5.4.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (3.5 kB)
Collecting requests-futures>=1.0.1 (from yahooquery)
  Downloading requests_futures-1.0.2-py2.py3-none-any.whl.metadata (12 kB)
Collecting lxml>=4.9.3 (from yahooquery)
  Downloading lxml-5.4.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (3.5 kB)
Collecting requests-futures>=1.0.1 (from yahooquery)
  Downloading req

In [5]:
try:
    from yahooquery import Ticker
    
    # Create Ticker objects for AAPL and MSFT
    aapl_yq = Ticker("AAPL")
    msft_yq = Ticker("MSFT")
    
    # Fetch historical close prices for AAPL and MSFT with error handling
    try:
        aapl_hist_yq = aapl_yq.history(period="max")
        if isinstance(aapl_hist_yq, dict) and 'error' in aapl_hist_yq:
            print(f"Error fetching AAPL data: {aapl_hist_yq['error']}")
            aapl_hist_yq = aapl_yq.history(period="5y")
        aapl_close = aapl_hist_yq["close"]
        print(f"AAPL (yahooquery) data points: {len(aapl_close)}")
    except Exception as e:
        print(f"Failed to fetch AAPL data: {str(e)}")
    
    try:
        msft_hist_yq = msft_yq.history(period="max")
        if isinstance(msft_hist_yq, dict) and 'error' in msft_hist_yq:
            print(f"Error fetching MSFT data: {msft_hist_yq['error']}")
            msft_hist_yq = msft_yq.history(period="5y")
        msft_close = msft_hist_yq["close"]
        print(f"MSFT (yahooquery) data points: {len(msft_close)}")
    except Exception as e:
        print(f"Failed to fetch MSFT data: {str(e)}")
        
except ModuleNotFoundError:
    print("yahooquery module not found. Please run the installation cell above.")
except Exception as e:
    print(f"An unexpected error occurred: {str(e)}")

AAPL (yahooquery) data points: 11199
MSFT (yahooquery) data points: 9873
MSFT (yahooquery) data points: 9873


# Data Exploration and Visualization

Let's examine and visualize the stock data we've collected using yahooquery.

In [7]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import display

# Set style for better looking plots
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (14, 8)

# Display head of the dataframes to see their structure
print("AAPL Historical Data - First 5 rows:")
display(aapl_hist_yq.head())

print("\nMSFT Historical Data - First 5 rows:")
display(msft_hist_yq.head())

# Display basic information about the dataframes
print("\nAAPL dataframe info:")
print(f"Shape: {aapl_hist_yq.shape}")
print(f"Index range: {aapl_hist_yq.index[0]} to {aapl_hist_yq.index[-1]}")
print(f"Columns: {list(aapl_hist_yq.columns)}")

print("\nMSFT dataframe info:")
print(f"Shape: {msft_hist_yq.shape}")
print(f"Index range: {msft_hist_yq.index[0]} to {msft_hist_yq.index[-1]}")
print(f"Columns: {list(msft_hist_yq.columns)}")

# Create a function to convert MultiIndex to date-only index for easier plotting
def prepare_for_plot(df, column='close'):
    # Extract the date part from MultiIndex
    plot_df = df[column].reset_index()
    plot_df['date'] = pd.to_datetime(plot_df['date'])
    plot_df = plot_df.set_index('date')
    plot_df = plot_df.drop('symbol', axis=1)
    return plot_df

# Prepare data for plotting
aapl_plot = prepare_for_plot(aapl_hist_yq)
msft_plot = prepare_for_plot(msft_hist_yq)

# Plot the closing prices
plt.figure(figsize=(14, 8))
plt.plot(aapl_plot.index, aapl_plot, label='AAPL')
plt.plot(msft_plot.index, msft_plot, label='MSFT')
plt.title('Historical Close Prices: AAPL vs MSFT', fontsize=16)
plt.xlabel('Date', fontsize=14)
plt.ylabel('Price ($)', fontsize=14)
plt.legend(fontsize=12)
plt.grid(True)
plt.tight_layout()
plt.show()

# Create recent performance comparison (last year)
recent_aapl = aapl_plot[aapl_plot.index > pd.Timestamp.now() - pd.DateOffset(years=1)]
recent_msft = msft_plot[msft_plot.index > pd.Timestamp.now() - pd.DateOffset(years=1)]

# Normalize to 100 for percentage comparison
if not recent_aapl.empty and not recent_msft.empty:
    recent_aapl = recent_aapl / recent_aapl.iloc[0] * 100
    recent_msft = recent_msft / recent_msft.iloc[0] * 100
    
    plt.figure(figsize=(14, 8))
    plt.plot(recent_aapl.index, recent_aapl, label='AAPL')
    plt.plot(recent_msft.index, recent_msft, label='MSFT')
    plt.title('Relative Performance: AAPL vs MSFT (Last Year)', fontsize=16)
    plt.xlabel('Date', fontsize=14)
    plt.ylabel('Relative Price (%)', fontsize=14)
    plt.legend(fontsize=12)
    plt.grid(True)
    plt.tight_layout()
    plt.show()

AAPL Historical Data - First 5 rows:


Unnamed: 0_level_0,Unnamed: 1_level_0,open,high,low,close,volume,adjclose,dividends,splits
symbol,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
AAPL,1980-12-12,0.128348,0.128906,0.128348,0.128348,469033600,0.098597,0.0,0.0
AAPL,1980-12-15,0.12221,0.12221,0.121652,0.121652,175884800,0.093453,0.0,0.0
AAPL,1980-12-16,0.113281,0.113281,0.112723,0.112723,105728000,0.086594,0.0,0.0
AAPL,1980-12-17,0.115513,0.116071,0.115513,0.115513,86441600,0.088737,0.0,0.0
AAPL,1980-12-18,0.118862,0.11942,0.118862,0.118862,73449600,0.09131,0.0,0.0



MSFT Historical Data - First 5 rows:


Unnamed: 0_level_0,Unnamed: 1_level_0,open,high,low,close,volume,adjclose,dividends,splits
symbol,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
MSFT,1986-03-13,0.088542,0.101563,0.088542,0.097222,1031788800,0.059598,0.0,0.0
MSFT,1986-03-14,0.097222,0.102431,0.097222,0.100694,308160000,0.061726,0.0,0.0
MSFT,1986-03-17,0.100694,0.103299,0.100694,0.102431,133171200,0.062791,0.0,0.0
MSFT,1986-03-18,0.102431,0.103299,0.098958,0.099826,67766400,0.061194,0.0,0.0
MSFT,1986-03-19,0.099826,0.100694,0.097222,0.09809,47894400,0.06013,0.0,0.0



AAPL dataframe info:
Shape: (11199, 8)
Index range: ('AAPL', datetime.date(1980, 12, 12)) to ('AAPL', datetime.datetime(2025, 5, 19, 14, 27, 3, tzinfo=<DstTzInfo 'America/New_York' EDT-1 day, 20:00:00 DST>))
Columns: ['open', 'high', 'low', 'close', 'volume', 'adjclose', 'dividends', 'splits']

MSFT dataframe info:
Shape: (9873, 8)
Index range: ('MSFT', datetime.date(1986, 3, 13)) to ('MSFT', datetime.datetime(2025, 5, 19, 14, 27, 1, tzinfo=<DstTzInfo 'America/New_York' EDT-1 day, 20:00:00 DST>))
Columns: ['open', 'high', 'low', 'close', 'volume', 'adjclose', 'dividends', 'splits']


ValueError: Cannot mix tz-aware with tz-naive values

In [9]:
from yahooquery import Ticker

# List of tickers
tickers = Ticker(['AAPL', 'MSFT', 'IBM'])

# Get and display option chain data
# Note: option_chain is a method, not a property with dictionary access
try:
    # Get option chain data for each ticker
    for symbol in ['AAPL', 'MSFT', 'IBM']:
        print(f"\n--- Option Chain for {symbol} ---")
        
        # Fetch the option chain for the symbol
        option_data = tickers.option_chain(symbol)
        
        # Check if we got valid data
        if isinstance(option_data, dict) and not option_data.get('error'):
            # Display available expiration dates
            if 'expirationDates' in option_data:
                print(f"Expiration dates for {symbol}:", option_data['expirationDates'])
                
                # Get the first expiration date
                if option_data['expirationDates']:
                    expiry = option_data['expirationDates'][0]
                    
                    # Get options data for this expiration
                    if 'options' in option_data and option_data['options']:
                        # Get calls and puts
                        for option in option_data['options']:
                            if option['expirationDate'] == expiry:
                                if 'calls' in option and len(option['calls']) > 0:
                                    print(f"\nCalls for {symbol} (expiry: {expiry}):")
                                    print(option['calls'].head())
                                else:
                                    print(f"No calls data available for {symbol}")
                                
                                if 'puts' in option and len(option['puts']) > 0:
                                    print(f"\nPuts for {symbol} (expiry: {expiry}):")
                                    print(option['puts'].head())
                                else:
                                    print(f"No puts data available for {symbol}")
                    else:
                        print(f"No options data available for {symbol}")
                else:
                    print(f"No expiration dates available for {symbol}")
            else:
                print(f"No expiration dates found for {symbol}")
        else:
            error_msg = option_data.get('error', 'Unknown error') if isinstance(option_data, dict) else "Invalid response"
            print(f"Error fetching option chain for {symbol}: {error_msg}")
            
except Exception as e:
    print(f"An error occurred: {str(e)}")
    print("Try using individual calls for each ticker:")
    
    # Alternative approach using individual ticker objects
    print("\n--- Alternative approach ---")
    aapl_options = Ticker('AAPL')
    try:
        aapl_chain = aapl_options.option_chain
        print("\nAAPL Options available:", "Yes" if aapl_chain is not None else "No")
        if aapl_chain is not None and not isinstance(aapl_chain, dict):
            print("First few option dates:", aapl_chain.index.levels[1][:3])
    except Exception as e:
        print(f"AAPL options error: {str(e)}")


--- Option Chain for AAPL ---
An error occurred: 'DataFrame' object is not callable
Try using individual calls for each ticker:

--- Alternative approach ---
An error occurred: 'DataFrame' object is not callable
Try using individual calls for each ticker:

--- Alternative approach ---

AAPL Options available: Yes
First few option dates: DatetimeIndex(['2025-05-23', '2025-05-30', '2025-06-06'], dtype='datetime64[ns]', name='expiration', freq=None)

AAPL Options available: Yes
First few option dates: DatetimeIndex(['2025-05-23', '2025-05-30', '2025-06-06'], dtype='datetime64[ns]', name='expiration', freq=None)
