JSON cleansing : remove some words at the start by extracting the JSON content of the JSON-file :

In [2]:
import json, re, os

def fix_json(json_string):
    # Replace single quotes with double quotes
    json_string = json_string.replace("'", '"')
    
    ## DONT USE. 
      #  Ensure the JSON string is valid by checking for unescaped double quotes inside the JSON
      # json_string = re.sub(r'(?<!\\)"', r'\"', json_string)
    ##
    
    # Add double quotes around property names if missing
    json_string = re.sub(r'(\w+):', r'"\1":', json_string)

    
    return json_string


def extract_json_from_file(input_file):
    """
    Cleans JSON file with regex extraction in between square brackets [ ].
    
    :param input_file: string. Name of the file to cleanse.
    :param output_file: string. Name of the file after cleansing.
    :return: None.

    Example :
    input_file = 'MY_FILE.json'  # Replace with your input file name
    will export a cleansed file as 'MY_FILE_cleaned.json'
    """
    
    # Generate the output filename
    file_name, file_extension = os.path.splitext(input_file)
    output_file = f"{file_name}_cleansed{file_extension}"
    
    
    with open(input_file, 'r') as file:
        content = file.read()

    # Use regex to find the JSON part, ignoring some words at the start like "Historical options, success, "
    json_match = re.search(r'\[.*\]', content)
    
    if json_match:
        json_string = json_match.group()
        
        # Fix the JSON string
        fixed_json_string = fix_json(json_string)
        
        # Parse the JSON string to ensure it's valid
        try:
            json_data = json.loads(fixed_json_string)
            
            # Write the formatted JSON to the output file
            with open(output_file, 'w') as file:
                json.dump(json_data, file, indent=2)
            
            print(f"JSON content extracted and saved to {output_file}")
        except json.JSONDecodeError as e:
            print(f"Error decoding JSON: {e}")
    else:
        print("No JSON content found in the file.")

Convert data file to data frames :

In [4]:
import pandas as pd
import numpy as np
import json, requests

from MyAPIKey import api_key
realtime = "HISTORICAL_OPTIONS"

data_demo = extract_json_from_file("../data/1.0-demo.json")
df_demo = pd.read_json("../data/1.0-demo_cleansed.json")
# df_demo = pd.read_json(f"https://www.alphavantage.co/query?function={realtime}&symbol=IBM&apikey={api_key}&datatype=json")
# realtime = "HISTORICAL_OPTIONS"
# requests.get(f'https://www.alphavantage.co/query?function={realtime}&symbol=IBM&apikey={api_key}&datatype=json').json()

data_AAPL = extract_json_from_file("../data/1.0-AAPL.json")
df_AAPL = pd.read_json("../data/1.0-AAPL_cleansed.json")


data_GOOGL = pd.read_json("../data/1.0-GOOGL.json")
df_GOOGL = pd.read_json("../data/1.0-GOOGL_cleansed.json")


JSON content extracted and saved to ../data/1.0-demo_cleansed.json
JSON content extracted and saved to ../data/1.0-AAPL_cleansed.json
JSON content extracted and saved to ../data/1.0-GOOGL_cleansed.json


First look into the data :

In [6]:
from IPython.display import display

print(len(df_demo))
print(len(df_AAPL))
# print(len(df_GOOGL))
display(df_demo.head(14))
display(df_AAPL.head(111))
# display(df_GOOGL.head(4))


916
2112
1798


Unnamed: 0,contractID,symbol,expiration,strike,type,last,mark,bid,bid_size,ask,ask_size,volume,open_interest,date,implied_volatility,delta,gamma,theta,vega,rho
0,IBM240726C00095000,IBM,2024-07-26,95.0,call,0.0,89.1,87.0,10,91.2,10,0,0,2024-07-24,3.46167,0.99663,0.00022,-0.4873,0.00138,0.00517
1,IBM240726P00095000,IBM,2024-07-26,95.0,put,0.0,0.01,0.0,0,2.13,7,0,0,2024-07-24,2.95253,-0.00086,7e-05,-0.10715,0.0004,-1e-05
2,IBM240726C00100000,IBM,2024-07-26,100.0,call,0.0,84.1,82.0,2,86.2,10,0,0,2024-07-24,3.20222,0.99646,0.00024,-0.47602,0.00145,0.00544
3,IBM240726P00100000,IBM,2024-07-26,100.0,put,0.0,0.01,0.0,0,2.13,3,0,0,2024-07-24,2.73623,-0.00092,8e-05,-0.10636,0.00043,-1e-05
4,IBM240726C00105000,IBM,2024-07-26,105.0,call,0.0,79.1,77.0,2,81.2,10,0,0,2024-07-24,2.95558,0.99627,0.00028,-0.46467,0.00152,0.00571
5,IBM240726P00105000,IBM,2024-07-26,105.0,put,0.0,0.01,0.0,0,1.27,5,0,275,2024-07-24,2.53014,-0.001,0.0001,-0.10553,0.00046,-1e-05
6,IBM240726C00110000,IBM,2024-07-26,110.0,call,0.0,74.2,72.0,10,76.4,10,0,0,2024-07-24,3.12281,0.99043,0.0006,-1.05303,0.00349,0.00592
7,IBM240726P00110000,IBM,2024-07-26,110.0,put,0.0,0.01,0.0,0,1.27,11,0,11,2024-07-24,2.33304,-0.00108,0.00011,-0.10459,0.00049,-1e-05
8,IBM240726C00115000,IBM,2024-07-26,115.0,call,0.0,69.35,67.0,2,71.7,10,0,0,2024-07-24,3.19033,0.98257,0.00099,-1.76735,0.00587,0.00611
9,IBM240726P00115000,IBM,2024-07-26,115.0,put,0.0,0.01,0.0,0,1.27,11,0,95,2024-07-24,2.14432,-0.00117,0.00013,-0.10364,0.00053,-1e-05


Unnamed: 0,contractID,symbol,expiration,strike,type,last,mark,bid,bid_size,ask,ask_size,volume,open_interest,date,implied_volatility,delta,gamma,theta,vega,rho
0,AAPL240726C00100000,AAPL,2024-07-26,100.0,call,125.45,118.22,116.60,1,119.85,17,0,25,2024-07-24,2.90194,1.00000,0.00000,-0.05328,0.00000,0.00548
1,AAPL240726P00100000,AAPL,2024-07-26,100.0,put,0.00,0.01,0.00,0,0.01,3502,0,50,2024-07-24,3.41106,-0.00063,0.00004,-0.11118,0.00036,-0.00001
2,AAPL240726C00105000,AAPL,2024-07-26,105.0,call,0.00,113.20,111.45,1,114.95,17,0,0,2024-07-24,2.77953,1.00000,0.00000,-0.05595,0.00000,0.00575
3,AAPL240726P00105000,AAPL,2024-07-26,105.0,put,0.00,0.01,0.00,0,0.01,500,0,0,2024-07-24,3.20863,-0.00067,0.00005,-0.11063,0.00038,-0.00001
4,AAPL240726C00110000,AAPL,2024-07-26,110.0,call,100.65,108.15,106.45,1,109.85,17,0,1,2024-07-24,2.65713,1.00000,0.00000,-0.05861,0.00000,0.00603
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
106,AAPL240726C00285000,AAPL,2024-07-26,285.0,call,0.01,0.01,0.00,0,0.01,445,0,1679,2024-07-24,1.21233,0.00180,0.00029,-0.10350,0.00093,0.00002
107,AAPL240726P00285000,AAPL,2024-07-26,285.0,put,0.00,67.03,65.35,2,68.70,10,0,0,2024-07-24,2.07390,-0.95062,0.00304,-2.97951,0.01652,-0.01506
108,AAPL240726C00290000,AAPL,2024-07-26,290.0,call,0.01,0.01,0.00,0,0.01,4377,5,638,2024-07-24,1.28367,0.00171,0.00027,-0.10453,0.00089,0.00002
109,AAPL240726P00290000,AAPL,2024-07-26,290.0,put,0.00,72.10,70.45,2,73.75,1,0,0,2024-07-24,2.22344,-0.94896,0.00291,-3.29202,0.01696,-0.01531


Unnamed: 0,contractID,symbol,expiration,strike,type,last,mark,bid,bid_size,ask,ask_size,volume,open_interest,date,implied_volatility,delta,gamma,theta,vega,rho
0,GOOGL240726C00075000,GOOGL,2024-07-26,75.0,call,98.01,97.65,95.3,1,100.0,20,6,1,2024-07-24,3.02566,1.0,0.0,-0.03996,0.0,0.00411
1,GOOGL240726P00075000,GOOGL,2024-07-26,75.0,put,0.0,0.01,0.0,0,0.01,502,0,30,2024-07-24,3.69779,-0.00073,5e-05,-0.10826,0.00032,-1e-05
2,GOOGL240726C00080000,GOOGL,2024-07-26,80.0,call,96.9,92.65,90.3,1,95.0,20,0,1,2024-07-24,2.87071,1.0,0.0,-0.04263,0.0,0.00438
3,GOOGL240726P00080000,GOOGL,2024-07-26,80.0,put,0.0,0.01,0.0,0,0.01,47,0,1,2024-07-24,3.42524,-0.00079,6e-05,-0.10759,0.00034,-1e-05
