In [13]:
import yfinance as yf
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from IPython.display import display               #This is used so that we can print prettier and more clear tables

def collect_amazon_data():
    amzn = yf.Ticker("AMZN")
    stock_data = amzn.history(period = "1mo")
    print("First, we'll take a look at the variables in our data:")
    display(stock_data.tail())
    print("We can see that we have 7 different columns. Since Amazon doesn't pay dividends and it hasn't done a stock split since 2022 we will delete these variables")
    
    stock_data = stock_data.drop("Dividends", axis = 1)
    stock_data = stock_data.drop("Stock Splits", axis = 1)
    display(stock_data.tail())
    current_price = stock_data['Close'].iloc[-1]
    print(f"Current amazon price: ${current_price:.2f}")
    
    #Log returns
    log_returns = np.log(stock_data["Close"] / stock_data["Close"].shift(1))
    hist_vol = log_returns.std() * np.sqrt(252)    #252 trading days. The annual volatility is equal to the daily volatility times the square of the total number of days
    print(f"Annualized volatility based on 30 day lookback: {hist_vol:.2%}")
    
    #Now we get the options data
    expirations = amzn.options
    print("Now we will look at the available options expiration dates:")
    print(f"We have found {len(expirations)} availabe expiration dates")
    print("These are the first 10 dates:")
    display(expirations[:10])
    
    print("Noow we will look at the options information")
    opt_chain = amzn.option_chain()
    display(opt_chain.calls.head())
    print(type(opt_chain))
    print("We can see that we get information about every options contract like the date of the last trade, its strike price, its volume and more.")
    #print("My objective now is to make two dataframes for each expiration date, one for call and one for put options.")    That would be too many dataframes
    print("I will try to add a exp_date column and a potion_type column so we can see when the contract expires and if its a put or a call contract.")
    
    all_options =[]
    for exp_date in expirations:
        opt_chain = amzn.option_chain(exp_date)     #opt_chain is an object that contains two dataframes, one for calls and one for puts
        
        calls =  opt_chain.calls
        calls["contract_type"] = "Calls"
        calls["exp_date"] = exp_date
        
        puts = opt_chain.puts
        puts["contract_type"] = "Puts"
        puts["exp_date"] = exp_date
        #We use the concat method because the dataframes have a shared index
        options = pd.concat([calls, puts])
        all_options.append(options)
        #Now we have a list of dataframes
    #We want to make a single dataframe from a list of dataframes. For this we use again pd.concat() to join them vertically
    opt_chain = pd.concat(all_options)
    display(opt_chain)
    print("Now we have a dataframe with all the option contracts for the existing future expiration dates. ")
    #print("We can see that some contracts have 0 fro bid and for ask. We are not interested in this contracts so we must remove them. We will also remove contracts with 0 volume.")
    #opt_chain = opt_chain[opt_chain["ask"] > 0]
    #opt_chain = opt_chain[opt_chain["bid"] > 0]
    #opt_chain = opt_chain[opt_chain["volume"] > 0]
    #This isn't the right call, because we are left with a very low number of contracts when we run it outside market hours. We will only look at the volume at the end of this phase.
    
    print("\nNow we will introduce and calculate different parameters in the dataframe that will be useful later on.")
    print("These parameters are: \n --> Current stock price \n --> Mid price = Value between the bid and ask prices \n --> Difference between the bid and ask prices \n --> Moneyness = Ratio of stock price to contract strike price")
    print(" --> Days to expiration \n --> Years to expiration,T (for Black-Scholes)")
    opt_chain["current_price"] = current_price
    opt_chain["mid_price"] = (opt_chain["bid"] + opt_chain["ask"])/2
    opt_chain["bid_ask_diff"] = opt_chain["ask"] - opt_chain["bid"]
    opt_chain["moneyness"] = current_price / opt_chain["strike"]
    opt_chain["days_to_exp"] = (pd.to_datetime(opt_chain["exp_date"]) - pd.Timestamp.now()).dt.days
    opt_chain["T"] = opt_chain["days_to_exp"] / 365
    
    print("\nWe will remove uninteresting contracts like the ones with 0 volume.")
    in_len = len(opt_chain)
    opt_chain = opt_chain[opt_chain["volume"] > 0].copy()
    print(f"We have dropped {in_len - len(opt_chain)} rows of data / contracts.")
    display(opt_chain)
    
    print("Finally we will save this new dataframe. We will save it first with the current date and then with a general name that will get overwritten every time we run this script. This way we can have data from different days that might come useful in future analisis.")
    time_now = datetime.now().strftime("%Y%m%d_%H%M")        #Formateamos el nombre del archivo
    opt_chain.to_csv(f"amazon_options_clean_{time_now}.csv", index = False)
    opt_chain.to_csv("amazon_options_clean.csv", index = False)
collect_amazon_data()

First, we'll take a look at the variables in our data:


Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2026-01-23 00:00:00-05:00,234.960007,240.449997,234.570007,239.160004,33778500,0.0,0.0
2026-01-26 00:00:00-05:00,239.979996,240.949997,237.539993,238.419998,32825500,0.0,0.0
2026-01-27 00:00:00-05:00,239.690002,244.880005,238.080002,244.679993,38029200,0.0,0.0
2026-01-28 00:00:00-05:00,246.369995,247.779999,241.529999,243.009995,40702800,0.0,0.0
2026-01-29 00:00:00-05:00,242.955002,243.0,236.742004,239.070007,22050625,0.0,0.0


We can see that we have 7 different columns. Since Amazon doesn't pay dividends and it hasn't done a stock split since 2022 we will delete these variables


Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2026-01-23 00:00:00-05:00,234.960007,240.449997,234.570007,239.160004,33778500
2026-01-26 00:00:00-05:00,239.979996,240.949997,237.539993,238.419998,32825500
2026-01-27 00:00:00-05:00,239.690002,244.880005,238.080002,244.679993,38029200
2026-01-28 00:00:00-05:00,246.369995,247.779999,241.529999,243.009995,40702800
2026-01-29 00:00:00-05:00,242.955002,243.0,236.742004,239.070007,22050625


Current amazon price: $239.07
Annualized volatility based on 30 day lookback: 28.41%
Now we will look at the available options expiration dates:
We have found 22 availabe expiration dates
These are the first 10 dates:


('2026-01-30',
 '2026-02-06',
 '2026-02-13',
 '2026-02-20',
 '2026-02-27',
 '2026-03-06',
 '2026-03-20',
 '2026-04-17',
 '2026-05-15',
 '2026-06-18')

Noow we will look at the options information


Unnamed: 0,contractSymbol,lastTradeDate,strike,lastPrice,bid,ask,change,percentChange,volume,openInterest,impliedVolatility,inTheMoney,contractSize,currency
0,AMZN260130C00125000,2026-01-26 20:35:12+00:00,125.0,114.57,111.1,115.0,0.0,0.0,3,20,4.898441,True,REGULAR,USD
1,AMZN260130C00130000,2026-01-26 20:33:47+00:00,130.0,109.4,106.1,110.15,0.0,0.0,2,12,4.773442,True,REGULAR,USD
2,AMZN260130C00135000,2026-01-27 19:47:37+00:00,135.0,108.84,101.1,105.1,0.0,0.0,1,38,4.472661,True,REGULAR,USD
3,AMZN260130C00140000,2026-01-26 20:55:00+00:00,140.0,98.5,96.1,100.1,0.0,0.0,5,10,4.226567,True,REGULAR,USD
4,AMZN260130C00145000,2026-01-26 20:34:47+00:00,145.0,94.46,91.1,95.1,0.0,0.0,3,6,3.986328,True,REGULAR,USD


<class 'yfinance.ticker.Options'>
We can see that we get information about every options contract like the date of the last trade, its strike price, its volume and more.
I will try to add a exp_date column and a potion_type column so we can see when the contract expires and if its a put or a call contract.


Unnamed: 0,contractSymbol,lastTradeDate,strike,lastPrice,bid,ask,change,percentChange,volume,openInterest,impliedVolatility,inTheMoney,contractSize,currency,contract_type,exp_date
0,AMZN260130C00125000,2026-01-26 20:35:12+00:00,125.0,114.57,111.10,115.00,0.0,0.0,3.0,20,4.898441,True,REGULAR,USD,Calls,2026-01-30
1,AMZN260130C00130000,2026-01-26 20:33:47+00:00,130.0,109.40,106.10,110.15,0.0,0.0,2.0,12,4.773442,True,REGULAR,USD,Calls,2026-01-30
2,AMZN260130C00135000,2026-01-27 19:47:37+00:00,135.0,108.84,101.10,105.10,0.0,0.0,1.0,38,4.472661,True,REGULAR,USD,Calls,2026-01-30
3,AMZN260130C00140000,2026-01-26 20:55:00+00:00,140.0,98.50,96.10,100.10,0.0,0.0,5.0,10,4.226567,True,REGULAR,USD,Calls,2026-01-30
4,AMZN260130C00145000,2026-01-26 20:34:47+00:00,145.0,94.46,91.10,95.10,0.0,0.0,3.0,6,3.986328,True,REGULAR,USD,Calls,2026-01-30
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29,AMZN281215P00320000,2026-01-28 18:59:42+00:00,320.0,94.55,96.85,98.85,0.0,0.0,3.0,34,0.257286,True,REGULAR,USD,Puts,2028-12-15
30,AMZN281215P00330000,2026-01-27 15:59:34+00:00,330.0,103.20,104.30,106.40,0.0,0.0,1.0,24,0.252586,True,REGULAR,USD,Puts,2028-12-15
31,AMZN281215P00340000,2026-01-28 15:35:08+00:00,340.0,108.60,112.20,114.20,0.0,0.0,6.0,29,0.248024,True,REGULAR,USD,Puts,2028-12-15
32,AMZN281215P00350000,2026-01-27 19:59:52+00:00,350.0,116.76,119.70,122.25,0.0,0.0,3.0,107,0.243690,True,REGULAR,USD,Puts,2028-12-15


Now we have a dataframe with all the option contracts for the existing future expiration dates. 

Now we will introduce and calculate different parameters in the dataframe that will be useful later on.
These parameters are: 
 --> Current stock price 
 --> Mid price = Value between the bid and ask prices 
 --> Difference between the bid and ask prices 
 --> Moneyness = Ratio of stock price to contract strike price
 --> Days to expiration 
 --> Years to expiration,T (for Black-Scholes)

We will remove uninteresting contracts like the ones with 0 volume.
We have dropped 50 rows of data / contracts.


Unnamed: 0,contractSymbol,lastTradeDate,strike,lastPrice,bid,ask,change,percentChange,volume,openInterest,...,contractSize,currency,contract_type,exp_date,current_price,mid_price,bid_ask_diff,moneyness,days_to_exp,T
0,AMZN260130C00125000,2026-01-26 20:35:12+00:00,125.0,114.57,111.10,115.00,0.0,0.0,3.0,20,...,REGULAR,USD,Calls,2026-01-30,239.070007,113.050,3.90,1.912560,0,0.000000
1,AMZN260130C00130000,2026-01-26 20:33:47+00:00,130.0,109.40,106.10,110.15,0.0,0.0,2.0,12,...,REGULAR,USD,Calls,2026-01-30,239.070007,108.125,4.05,1.839000,0,0.000000
2,AMZN260130C00135000,2026-01-27 19:47:37+00:00,135.0,108.84,101.10,105.10,0.0,0.0,1.0,38,...,REGULAR,USD,Calls,2026-01-30,239.070007,103.100,4.00,1.770889,0,0.000000
3,AMZN260130C00140000,2026-01-26 20:55:00+00:00,140.0,98.50,96.10,100.10,0.0,0.0,5.0,10,...,REGULAR,USD,Calls,2026-01-30,239.070007,98.100,4.00,1.707643,0,0.000000
4,AMZN260130C00145000,2026-01-26 20:34:47+00:00,145.0,94.46,91.10,95.10,0.0,0.0,3.0,6,...,REGULAR,USD,Calls,2026-01-30,239.070007,93.100,4.00,1.648759,0,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29,AMZN281215P00320000,2026-01-28 18:59:42+00:00,320.0,94.55,96.85,98.85,0.0,0.0,3.0,34,...,REGULAR,USD,Puts,2028-12-15,239.070007,97.850,2.00,0.747094,1050,2.876712
30,AMZN281215P00330000,2026-01-27 15:59:34+00:00,330.0,103.20,104.30,106.40,0.0,0.0,1.0,24,...,REGULAR,USD,Puts,2028-12-15,239.070007,105.350,2.10,0.724455,1050,2.876712
31,AMZN281215P00340000,2026-01-28 15:35:08+00:00,340.0,108.60,112.20,114.20,0.0,0.0,6.0,29,...,REGULAR,USD,Puts,2028-12-15,239.070007,113.200,2.00,0.703147,1050,2.876712
32,AMZN281215P00350000,2026-01-27 19:59:52+00:00,350.0,116.76,119.70,122.25,0.0,0.0,3.0,107,...,REGULAR,USD,Puts,2028-12-15,239.070007,120.975,2.55,0.683057,1050,2.876712


Finally we will save this new dataframe. We will save it first with the current date and then with a general name that will get overwritten every time we run this script. This way we can have data from different days that might come useful in future analisis.
