# Machine Learning Driven Option Pricing

1. Objective
2. Data Collection
3. Data Preprocessing
4. Model Selection and Training
5. Model Evaluation
6. Analysis of Results
7. Conclusion
8. References

## 1. Objective

Predict the prices of options using machine learning models and ccompare them to traditional pricing models like Black-Scholes.

**Deliverables**:
- A ML model that can predict option prices
- A comparison analysis between the predicted prices and the prices derived from the Black-Scholes model.

## 2. Data Collection

In [1]:
# My API Key
# apikey = "6ER8CYPPF7N3KTM5"

In [2]:
import datetime
import json
import requests
import pandas as pd

import time
import os
import re
import plotly.express as px

import plotly.graph_objects as go
from plotly.subplots import make_subplots

from datetime import datetime
import csv

In [3]:
dir_list = os.listdir()
print(dir_list)

['$PROFILE.txt', '.git', '.ruff_cache', 'EDGAR_Downloader.ipynb', 'financetoolkit_tester.ipynb', 'financial statements (aapl).xlsx', 'financial statements (cls).xlsx', 'financial statements (nvda).xlsx', 'financial statements (titn).xlsx', 'ml_options_pricing.ipynb', 'news_summary.ipynb', 'pull_historicals.ipynb', 'QQQ 2023-01-01 To 2023-10-31', 'README.md', 'scrape_news.ipynb', 'sec-edgar-filings', 'SPY 2023-01-01 to 2023-10-31', 'statarb_pairs_MM.ipynb']


In [4]:
symbols = ['qqq'] # lower case necessary
stock = symbols[0]

# List to hold all file names
files = []

# Command to get file names
dirContents = os.listdir("./QQQ 2023-01-01 To 2023-10-31") # data held in this folder

# Iterate through files
for f in dirContents:

    # Use RegEx to find files that match the pattern
    if (re.search(stock + r".+T15.+", f)): #if(re.search(stock + r".+T09.+", f)):
        files.append(f)
files.sort()
print(len(files))

303


In [5]:
# Display all file names
files

['qqq-2023-01-01-T1552.txt',
 'qqq-2023-01-02-T1551.txt',
 'qqq-2023-01-03-T1552.txt',
 'qqq-2023-01-04-T1552.txt',
 'qqq-2023-01-05-T1551.txt',
 'qqq-2023-01-06-T1552.txt',
 'qqq-2023-01-07-T1552.txt',
 'qqq-2023-01-08-T1551.txt',
 'qqq-2023-01-09-T1552.txt',
 'qqq-2023-01-10-T1552.txt',
 'qqq-2023-01-11-T1551.txt',
 'qqq-2023-01-12-T1552.txt',
 'qqq-2023-01-13-T1551.txt',
 'qqq-2023-01-14-T1551.txt',
 'qqq-2023-01-15-T1552.txt',
 'qqq-2023-01-16-T1551.txt',
 'qqq-2023-01-17-T1552.txt',
 'qqq-2023-01-18-T1552.txt',
 'qqq-2023-01-19-T1551.txt',
 'qqq-2023-01-20-T1552.txt',
 'qqq-2023-01-21-T1551.txt',
 'qqq-2023-01-22-T1551.txt',
 'qqq-2023-01-23-T1552.txt',
 'qqq-2023-01-24-T1551.txt',
 'qqq-2023-01-25-T1552.txt',
 'qqq-2023-01-26-T1552.txt',
 'qqq-2023-01-27-T1551.txt',
 'qqq-2023-01-28-T1552.txt',
 'qqq-2023-01-29-T1551.txt',
 'qqq-2023-01-30-T1551.txt',
 'qqq-2023-01-31-T1552.txt',
 'qqq-2023-02-01-T1551.txt',
 'qqq-2023-02-02-T1551.txt',
 'qqq-2023-02-03-T1552.txt',
 'qqq-2023-02-

In [6]:
# Variables to hold data
openI = []
dates = []

# Open one file at a time in files list
for fileIndex in range(len(files)):
    
    # Create a file object for file
    fileData = open("./QQQ 2023-01-01 To 2023-10-31/" + files[fileIndex], "r")

    # Read all data from file
    data = fileData.readlines()

    # Close file since data is now saved in variable
    fileData.close()

    # Iterate through file data and save it all to one long string
    text = ""
    for l in data:
        text += l

    # Text is formatted in json, so we parse data into a python dictionary
    json_data = json.loads(text)

    # Select the call options from the data
    options = json_data['callExpDateMap']

    # Iterate through expirations
    for expiration in options:
        # print(expiration)

        # This is trying to find a given expiration date
        if expiration.find("2024-01-19")!= -1:
            print(expiration)

            # Iterate through strike prices for each expiration date

            for strikePrice in options[expiration]:
                # print(strikePrice)
                # break
                if strikePrice =="250.0":
                    # print(options[expiration][strikePrice][0]['openInterest'])
                    openI.append(int(options[expiration][strikePrice][0]['openInterest'])) # open interest
                    dates.append(expiration) # expiration date away from the option trading day

2024-01-19:383
2024-01-19:382
2024-01-19:381
2024-01-19:380
2024-01-19:379
2024-01-19:378
2024-01-19:377
2024-01-19:376
2024-01-19:375
2024-01-19:374
2024-01-19:373
2024-01-19:372
2024-01-19:371
2024-01-19:370
2024-01-19:369
2024-01-19:368
2024-01-19:367
2024-01-19:366
2024-01-19:365
2024-01-19:364
2024-01-19:363
2024-01-19:362
2024-01-19:361
2024-01-19:360
2024-01-19:359
2024-01-19:358
2024-01-19:357
2024-01-19:356
2024-01-19:355
2024-01-19:354
2024-01-19:353
2024-01-19:352
2024-01-19:351
2024-01-19:349
2024-01-19:348
2024-01-19:347
2024-01-19:346
2024-01-19:345
2024-01-19:343
2024-01-19:342
2024-01-19:341
2024-01-19:340
2024-01-19:339
2024-01-19:338
2024-01-19:337
2024-01-19:336
2024-01-19:335
2024-01-19:334
2024-01-19:333
2024-01-19:332
2024-01-19:331
2024-01-19:329
2024-01-19:328
2024-01-19:327
2024-01-19:326
2024-01-19:325
2024-01-19:324
2024-01-19:323
2024-01-19:322
2024-01-19:321
2024-01-19:320
2024-01-19:318
2024-01-19:317
2024-01-19:316
2024-01-19:315
2024-01-19:314
2024-01-19

In [7]:
fig = px.scatter(x=dates, y=openI, title='Open Interest for QQQ 2024-01-19 250.0 Call Option')
fig.show()

Data provided by YouTuber "Investing With MacKey", who retrieved the data from Ameritrade API. 

In [8]:
# function
# DataSource = "HISTORICAL_OPTIONS"
# symbol
# CompanyTicker = "IBM"
# date
# Showdate = "2024-01-01"

# get the data from the API
# replace the "demo" apikey below with your own key from https://www.alphavantage.co/support/#api-key
# url = f'https://www.alphavantage.co/query?function={DataSource}&symbol={CompanyTicker}&date={Showdate}&apikey={apikey}'
# r = requests.get(url)
# data = r.json()

print(data)

#df = pd.DataFrame(data['data'])
#selected_columns = ['symbol', 'expiration', 'strike', 'type', 'mark', 'bid', 
#                    'bid_size', 'ask', 'ask_size', 'volume', 'open_interest', 'implied_volatility',
#                    'delta', 'gamma', 'theta', 'vega', 'rho']

#print(df[selected_columns].head())

['{"symbol":"QQQ","status":"SUCCESS","underlying":null,"strategy":"SINGLE","interval":0.0,"isDelayed":true,"isIndex":false,"interestRate":5.639,"underlyingPrice":350.795,"volatility":29.0,"daysToExpiration":0.0,"numberOfContracts":6720,"callExpDateMap":{"2023-10-31:0":{"290.0":[{"putCall":"CALL","symbol":"QQQ_103123C290","description":"QQQ Oct 31 2023 290 Call","exchangeName":"OPR","bid":60.72,"ask":60.83,"last":60.38,"mark":60.78,"bidSize":15,"askSize":15,"bidAskSize":"15X15","lastSize":0,"highPrice":60.38,"lowPrice":57.38,"openPrice":0.0,"closePrice":59.25,"totalVolume":9,"tradeDate":null,"tradeTimeInLong":1698777836865,"quoteTimeInLong":1698781896025,"netChange":1.13,"volatility":142.119,"delta":1.0,"gamma":0.0,"theta":-0.002,"vega":0.0,"rho":0.0,"openInterest":3,"timeValue":-0.41,"theoreticalOptionValue":60.83,"theoreticalVolatility":29.0,"optionDeliverablesList":null,"strikePrice":290.0,"expirationDate":1698782400000,"daysToExpiration":0,"expirationType":"R","lastTradingDay":16987