In [1]:
import requests
import numpy as np
import pandas as pd
from datetime import time
import requests
import datetime

def fetch_contracts(root, date, contract_type):
    """
    Fetch contracts data for a given root symbol, date, and contract type,
    and filter the results for the specified root symbol.
    
    :param root: Root symbol of the index, e.g., 'SPX' or 'SPXW'.
    :param date: Date in 'YYYYMMDD' format.
    :param contract_type: Type of contract data to fetch ('trade', 'quote', or 'open_interest').
    :return: Filtered JSON response from the API for the specified root.
    """
    url = f"http://127.0.0.1:25510/v2/list/contracts/option/{contract_type}"
    params = {"start_date": date}
    response = requests.get(url, params=params)
    if response.status_code == 200:
        data = response.json()
        # Filter the contracts for the specified root symbol
        filtered_contracts = [contract for contract in data['response'] if contract[0] == root]
        return {
            "header": data["header"],
            "response": filtered_contracts
        }
    else:
        #print(f"Failed to fetch contracts for {contract_type}: {response.status_code}, {response.text}")
        return None

#date = "20240223"
# Generate a list of dates from 20240511 to the present day
start_date = datetime.datetime(2022, 11, 19)
end_date = datetime.datetime(2022, 12, 6)

#end_date = datetime.datetime.now()
date_list = [(start_date + datetime.timedelta(days=x)).strftime('%Y%m%d') for x in range((end_date - start_date).days + 1)]





roots = ["SPXW", "SPY", "UVXY","SPX","QQQ","VIX"]
#roots = ["UVXY","VIX","CAT"]
contract_types = ["trade", "quote", "open_interest"]

# A list to hold dates with contracts found
#dates_with_contracts = []

# Iterate through each date
#for date in date_list:
#    contracts_found = False
#    for root in roots:
#        for contract_type in contract_types:
#            data = fetch_contracts(root, date, contract_type)
#            if data and data["response"]:
#                contracts_found = True
#                print(f"{root} - {date} - {contract_type}: {len(data['response'])} contracts found")
#    if contracts_found:
#        dates_with_contracts.append(date)



In [2]:
import os
import pandas as pd
import requests
from functools import reduce
import requests
from tqdm import tqdm  # Import tqdm
from datetime import datetime
from datetime import timedelta
import json

def log_strike_error_to_json(error_contracts_list, base_directory="H:\\Financial_Data\\Historical_data"):
    error_folder_name = "Strike_History_Errors"
    error_log_filename = "errors_log.json"
    
    for contract in error_contracts_list:
        root = contract["root"]
        start_date = contract["start_date"]
        error_folder_path = os.path.join(base_directory, root, error_folder_name, start_date)
        error_log_file_path = os.path.join(error_folder_path, error_log_filename)

        if not os.path.exists(error_folder_path):
            os.makedirs(error_folder_path)

        try:
            if os.path.exists(error_log_file_path):
                with open(error_log_file_path, "r") as file:
                    existing_errors = json.load(file)
            else:
                existing_errors = []
        except json.JSONDecodeError:
            existing_errors = []

        # Check for duplicates and append if no duplicates found
        if contract not in existing_errors:
            existing_errors.append(contract)
            with open(error_log_file_path, "w") as file:
                json.dump(existing_errors, file, indent=4)
    
    #print(f"Logged an error for contract: Root={root}, Expiration={expiration}, Strike={strike}, Right={right}, Start_Date={start_date}, End_Date={end_date} to JSON")

def log_strike_success_to_json(successful_contracts_list, base_directory="H:\\Financial_Data\\Historical_data"):
    success_folder_name = "Strike_History_Success"
    success_log_filename = "Completed_log.json"
    
    for contract in successful_contracts_list:
        root = contract["root"]
        start_date = contract["start_date"]
        success_folder_path = os.path.join(base_directory, root, success_folder_name, start_date)
        success_log_file_path = os.path.join(success_folder_path, success_log_filename)

        if not os.path.exists(success_folder_path):
            os.makedirs(success_folder_path)

        try:
            if os.path.exists(success_log_file_path):
                with open(success_log_file_path, "r") as file:
                    existing_data = json.load(file)
            else:
                existing_data = []
        except json.JSONDecodeError:
            existing_data = []
        
        # Check for duplicates and append if no duplicates found
        if contract not in existing_data:
            existing_data.append(contract)
            with open(success_log_file_path, "w") as file:
                json.dump(existing_data, file, indent=4)
    
    #print(f"Logged an error for contract: Root={root}, Expiration={expiration}, Strike={strike}, Right={right}, Start_Date={start_date}, End_Date={end_date} to JSON")

# Function to fetch data, convert to DataFrame, and apply 'ms_to_time'
def fetch_and_format_data(endpoint, columns, root, expiration, strike, right, start_date, end_date):
    interval = 10000
    base_url = "http://127.0.0.1:25510/v2/hist/option/"
    try:
        response = requests.get(f"{base_url}{endpoint}", params={
            "root": root, "exp": expiration, "strike": strike, "right": right,
            "start_date": start_date, "end_date": end_date, "ivl": interval
        })
        response.raise_for_status()  # Ensure we notice bad responses

        # Ensure there is data before attempting to decode JSON
        if response.text:
            data = response.json().get('response', [])
            return pd.DataFrame(data, columns=columns).assign(time_of_day=lambda df: df['ms_of_day'].apply(ms_to_time))
        else:
            # Handle cases where there is no response data
            error_contracts_list.append({
                "root":root,
                "expiration":expiration, 
                "strike":strike, 
                "right":right, 
                "start_date":start_date, 
                "end_date":end_date, 
            })
            return pd.DataFrame(columns=columns)
    except (requests.exceptions.HTTPError, requests.exceptions.RequestException, ValueError) as e:
        # Log the error and return an empty DataFrame
        error_contracts_list.append({
                "root":root,
                "expiration":expiration, 
                "strike":strike, 
                "right":right, 
                "start_date":start_date, 
                "end_date":end_date, 
            })
        return pd.DataFrame(columns=columns)

# Function to convert milliseconds to time of day
def ms_to_time(ms):
    seconds = (ms // 1000) % 60
    minutes = (ms // (1000 * 60)) % 60
    hours = (ms // (1000 * 60 * 60)) % 24
    return f"{hours:02d}:{minutes:02d}:{seconds:02d}"


def fetch_contracts_filtered_by_root(date, contract_type, url):
    """
    Fetch and filter contracts data for given date and contract type, filtering by root symbols.
    """
    #url = f"http://127.0.0.1:25510/v2/list/contracts/option/{contract_type}"
    params = {"start_date": date}
    response = requests.get(url, params=params)
    if response.status_code == 200:
        data = response.json()
        return data
    else:
        #print(f"Failed to fetch contracts for {contract_type}: {response.status_code}, {response.text}")
        return []

def collect_contracts_data(date, contract_types, url, url_1, url_2, roots=[""]):
    """
    Check to see if location already exists:
    """
    _base_directory = "H:\\Financial_Data\\Historical_data"
    _contract_list_folder = "CONTRACT_LIST"
    _folder_path = os.path.join(_base_directory, roots[0], _contract_list_folder,date)
    if os.path.exists(_folder_path):
        #print("skipped")
        return
    
    """
    Collect contracts data across specified roots and contract types into a list of dicts.
    """
    trade_contracts = fetch_contracts_filtered_by_root(date, contract_types[0], url)
    quote_contracts = fetch_contracts_filtered_by_root(date, contract_types[1], url_1)
    open_interest_contracts = fetch_contracts_filtered_by_root(date, contract_types[2], url_2)

    # Initialize empty lists to ensure variables are defined
    _trade_contracts = []
    _quote_contracts = []
    _open_interest_contracts = []
    # Filter the contracts by the specified roots and ensure all dicts have the expected keys
    if 'response' in trade_contracts:
        #_trade_contracts = [{"contract_type": "trade", "date": date, **contract} for contract in trade_contracts['response']]
        _trade_contracts = [{
        "contract_type": "trade",
        "date": date,
        "root": contract[0],
        "expiration": contract[1],
        "strike": contract[2],
        "right": contract[3]
    } for contract in trade_contracts['response']]
    if 'response' in quote_contracts:
        #_quote_contracts = [{"contract_type": "quote", "date": date, **contract} for contract in quote_contracts['response']]
        _quote_contracts = [{
        "contract_type": "trade",
        "date": date,
        "root": contract[0],
        "expiration": contract[1],
        "strike": contract[2],
        "right": contract[3]
    } for contract in quote_contracts['response']]
    if 'response' in open_interest_contracts:
        #_open_interest_contracts = [{"contract_type": "open_interest", "date": date, **contract} for contract in open_interest_contracts['response']]
        _open_interest_contracts = [{
        "contract_type": "trade",
        "date": date,
        "root": contract[0],
        "expiration": contract[1],
        "strike": contract[2],
        "right": contract[3]
    } for contract in open_interest_contracts['response']]
        
    filtered_contracts = _trade_contracts+_quote_contracts+_open_interest_contracts
    # Now, filtered_contracts contains dictionaries with a consistent set of keys
    df = pd.DataFrame(filtered_contracts, columns=["root", "expiration", "strike", "right", "contract_type","date"])
    df = df[df['root'].isin(roots)]
    df = df.drop_duplicates(subset=['root', 'expiration', 'strike', 'right'])
    missing_roots = [root for root in roots if root not in df['root'].unique()]
    if missing_roots:
        del df
        return

    base_directory = "H:\\Financial_Data\\Historical_data"
    contract_list_folder = "CONTRACT_LIST"
 
    for root in roots:   
        df_filtered = df[df['root'] == root]
        # Folder path for the current root
        folder_path = os.path.join(base_directory, root, contract_list_folder,date)
        if not os.path.exists(folder_path):
            os.makedirs(folder_path)
        filename = f"{root}_{date}.json"
        file_path = os.path.join(folder_path, filename)
        df_filtered.to_json(file_path, orient='records', lines=True)
    all_contracts = []   
    for index, row in df.iterrows():
        contract_dict = {
            "root": row['root'],
            "expiration": row['expiration'],
            "strike": row['strike'],
            "right": row['right'],
            "contract_type": row['contract_type'],
            "date": row['date']
        }
        all_contracts.append(contract_dict)
    del df
    del df_filtered
    return all_contracts


# Function to check and drop identical columns
def check_and_drop_identical_columns(df, potential_duplicates, suffix):
    for duplicate_col in potential_duplicates:
        # Remove the suffix to find the original column name
        # Ensure to handle different suffix lengths accurately
        #second_order_col=''
        if suffix == '_greeks':
            original_col = duplicate_col[:-7]  # '_greeks' has 7 characters
            #print(potential_duplicates)
            #print(original_col)
            #print(duplicate_col)
        elif suffix == '_greeks_second_order':
            #print(duplicate_col[:-20])
            original_col = duplicate_col[:-20] # '_greeks_second_order' has 20 characters
            first_order_col = original_col+'_greeks'
        elif suffix == '_greeks_third_order':
            #print(duplicate_col[:-19])
            original_col = duplicate_col[:-19] # '_greeks_third_order' has 19 characters
            first_order_col = original_col+'_greeks'
            second_order_col = original_col+'_greeks_second_order'
            #print(first_order_col)
            #print(second_order_col)
        else:
            original_col = duplicate_col
        
        # Check if the original column exists in the DataFrame
        if original_col in df.columns:
            # Check if the original column's data equals the duplicate column's data
            if df[original_col].equals(df[duplicate_col]):
                #print(original_col)
                #print(duplicate_col)
                #print('end')
                # Drop the duplicate column if the data is identical
                df.drop(columns=[duplicate_col], inplace=True)
        if suffix == '_greeks_second_order' or suffix == '_greeks_third_order':
            if first_order_col in df.columns:
                if df[first_order_col].equals(df[duplicate_col]):
                    df.drop(columns=[duplicate_col], inplace=True)
        if suffix == '_greeks_third_order':
            if second_order_col in df.columns:
                #print(second_order_col)
                if df[second_order_col].equals(df[duplicate_col]):
                    df.drop(columns=[duplicate_col], inplace=True)
        #print(df.columns)












def process_contracts(root, expiration, strike, right, start_date, end_date):
    # Assuming 'first_contract' is already defined as the first row of your initial DataFrame
    
    # Define constants
    interval = "10000"
    base_directory="H:\\Financial_Data\\Historical_data"
    # Base URL for API requests
    base_url = "http://127.0.0.1:25510/v2/hist/option/"

    # Columns for each DataFrame
    columns_quote = ['ms_of_day', 'bid_size', 'bid_exchange', 'bid', 'bid_condition', 'ask_size', 'ask_exchange', 'ask', 'ask_condition', 'date']
    columns_iv = ['ms_of_day', 'bid', 'bid_implied_vol', 'midpoint', 'mid_implied_vol', 'ask', 'ask_implied_vol', 'iv_error', 'ms_of_day2', 'underlying_price', 'date']
    columns_greeks = ['ms_of_day', 'bid2', 'ask2', 'delta', 'theta', 'vega', 'rho', 'epsilon', 'lamba', 'implied_vol', 'iv_error2', 'ms_of_day22', 'underlying_price2', 'date']
    columns_greeks_2nd_order = ['ms_of_day', 'bid3', 'ask3', 'gamma', 'vanna', 'charm', 'vomma', 'veta', 'implied_vol3', 'iv_error3', 'ms_of_day23', 'underlying_price3', 'date']
    columns_greeks_3rd_order = ['ms_of_day', 'bid4', 'ask4', 'speed', 'zomma', 'color', 'ultima', 'implied_vol4', 'iv_error4', 'ms_of_day24', 'underlying_price4', 'date']


    # Fetch and format data for each DataFrame
    df1 = fetch_and_format_data("quote", columns_quote, root, expiration, strike, right, start_date, end_date)
    df2 = fetch_and_format_data("implied_volatility", columns_iv, root, expiration, strike, right, start_date, end_date)
    df3 = fetch_and_format_data("greeks", columns_greeks, root, expiration, strike, right, start_date, end_date)
    df3.rename(columns={'time_of_day': 'time_of_day_x'}, inplace=True)
    df4 = fetch_and_format_data("greeks_second_order", columns_greeks_2nd_order, root, expiration, strike, right, start_date, end_date)
    df4.rename(columns={'time_of_day': 'time_of_day_y'}, inplace=True)
    df5 = fetch_and_format_data("greeks_third_order", columns_greeks_3rd_order, root, expiration, strike, right, start_date, end_date)
    df5.rename(columns={'time_of_day': 'time_of_day_z'}, inplace=True)

    # List of all DataFrames to merge
    dfs = [df2, df3, df4, df5]

    # Merge all DataFrames on 'ms_of_day' and 'date' using functools.reduce
    #merged_df = reduce(lambda left, right: pd.merge(left, right, on=['ms_of_day', 'date'], how='outer'), dfs)
   # merged_df = reduce(lambda left, right: pd.merge(left, right, on=['ms_of_day', 'date'], how='outer', suffixes=('_left', '_right')), dfs)
    # Example of renaming conflicting columns in all DataFrames
    #for i, df in enumerate(dfs):
     #   suffix = f"_df{i+1}"
      #  conflicting_columns = ['bid', 'ask', 'iv_error', 'ms_of_day2', 'underlying_price']
       # df.rename(columns={col: col + suffix for col in conflicting_columns if col in df.columns}, inplace=True)

    # Now merge them
    merged_df = reduce(lambda left, right: pd.merge(left, right, on=['ms_of_day', 'date'], how='outer'), dfs)
    #merged_df.rename(columns={'time_of_day_x': 'time_of_day'}, inplace=True)


    

    # Define the columns you want to keep as per the IV_Greeks specification
    iv_greeks_columns = [
        'ms_of_day', 'time_of_day', 'bid', 'bid_implied_vol', 'midpoint', 'mid_implied_vol', 'ask', 'ask_implied_vol',
        'iv_error', 'ms_of_day2', 'underlying_price', 'date', 'delta', 'theta', 'vega', 'rho', 'epsilon', 'lamba',
        'implied_vol', 'gamma', 'vanna', 'charm', 'vomma', 'veta', 'speed', 'zomma', 'color', 'ultima'
    ]

    # Keep only the columns specified in iv_greeks_columns
    final_df = merged_df[iv_greeks_columns]



    quote_column_order = [
        'ms_of_day', 'time_of_day', 'bid_size', 'bid_exchange', 'bid', 'bid_condition', 
        'ask_size', 'ask_exchange', 'ask', 'ask_condition', 'date'
    ]

    # Reorder the columns in 'df_quote'
    df_quote = df1[quote_column_order]



     #The final DataFrame is ready; now save it to the appropriate JSON file
    folder_path_iv = os.path.join(base_directory, root, 'IV_Greeks', start_date)
    folder_path_quotes = os.path.join(base_directory, root, 'QUOTES', start_date)
    
    if not os.path.exists(folder_path_iv):
        os.makedirs(folder_path_iv, exist_ok=True)
    if not os.path.exists(folder_path_quotes):
        os.makedirs(folder_path_quotes, exist_ok=True)
    
    file_path_iv = os.path.join(folder_path_iv, f"{root}_{start_date}_{expiration}_{strike}_{right}.json")
    file_path_quotes = os.path.join(folder_path_quotes, f"{root}_{start_date}_{expiration}_{strike}_{right}.json")

    final_df.to_json(file_path_iv, orient='records', lines=True)
    df_quote.to_json(file_path_quotes, orient='records', lines=True)

    # Log success
    successful_contracts_list.append({
                "root":root,
                "expiration":expiration, 
                "strike":strike, 
                "right":right, 
                "start_date":start_date, 
                "end_date":end_date, 
            })
    del df1
    del df2
    del df3
    del df4
    del df5
    del dfs
    del merged_df
    del final_df
    del df_quote

    
def load_logged_contracts(folder_path, filename):
    file_path = os.path.join(folder_path, filename)
    if os.path.exists(file_path):
        with open(file_path, "r") as file:
            try:
                return json.load(file)
            except json.JSONDecodeError:
                return []  # or handle the error as needed
    return []    
    
def get_normalized_entry(entry, keys):
    # Normalize the dictionary keys to lowercase
    normalized_entry = {k.lower(): entry.get(k) or entry.get(k.capitalize()) for k in keys}
    return tuple(normalized_entry[k] for k in keys)

# Example usage
#date = "20240220"
url = f"http://127.0.0.1:25510/v2/list/contracts/option/trade"
url_1 = f"http://127.0.0.1:25510/v2/list/contracts/option/quote"
url_2 = f"http://127.0.0.1:25510/v2/list/contracts/option/open_interest"
#params = {"start_date": date}
#response = requests.get(url, params=params)
#data = response.json()
contract_types = ["trade", "quote", "open_interest"]
#contracts_data = []  # Initialize as empty list


Trading_Days = ["SPXW", "SPY", "UVXY","SPX","QQQ","VIX","CAT","TSLA"]
for date in tqdm(date_list, desc="Total Contracts for Project Complete"):
    #contracts_data.extend(collect_contracts_data(date, contract_types, roots))
    contracts_data = collect_contracts_data(date, contract_types, url, url_1, url_2, Trading_Days)
  

base_directory = "H:\\Financial_Data\\Historical_data"
contract_list_folder = "CONTRACT_LIST"

#i=0
special_roots = {'SPXW', 'QQQ', 'SPY'}
_spx_root = {'SPX'}
_spxw_root = {'SPXW'}
_qqq_root = {'QQQ'}
_spy_root = {'SPY'}
for date in tqdm(date_list, desc="Total Project Complete"):    
    daily_contracts = []  # Initialize the list for daily contracts
    
    # Collect all file paths first
    file_paths = [os.path.join(base_directory, root, contract_list_folder, date, f"{root}_{date}.json") for root in roots]
    file_paths = [path for path in file_paths if os.path.exists(path)]

    current_date = datetime.strptime(date, '%Y%m%d')
    
    # Load dataframes if file exists
    daily_contracts = [pd.read_json(path, lines=True) for path in file_paths]
    
    # Concatenate all dataframes for the current day
    if daily_contracts:
        df = pd.concat(daily_contracts, ignore_index=True)
        #print("round SPX ")
        #print(df[df['root'] == 'SPX']['expiration'].unique())
        #print(df[(df['root'] == 'SPX') & (df['root'] != 'SPXW')]['expiration'].unique())

        
        
        #print("SPXW ")
        #print(df[df['root'] == 'SPXW']['expiration'].unique())
        
        #print("round SPX ")
        #print(df[df['root'] == 'SPX']['expiration'].unique())
        
        #print("round UVXY ")
        #print(df[df['root'] == 'UVXY']['expiration'].unique())

        #print("round SPY ")
        #print(df[df['root'] == 'SPY']['expiration'].unique())
        
        #print("round QQQ ")
        #print(df[df['root'] == 'QQQ']['expiration'].unique())

        #print("round VIX ")
        #print(df[df['root'] == 'VIX']['expiration'].unique())

        #print("round CAT ")
        #print(df[df['root'] == 'CAT']['expiration'].unique())
        
        
        filtered_dfs = []
        for root in df['root'].unique():
            root_df = df[df['root'] == root]  # Filter the DataFrame for the current root
            if root in _spxw_root:
                future_expirations = root_df[root_df['expiration'] >= int(current_date.strftime('%Y%m%d'))]['expiration'].unique()
                unique_expirations = sorted(future_expirations)[:3]
                filtered_root_df = root_df[root_df['expiration'].isin(unique_expirations)]

            elif root in _qqq_root:
                future_expirations = root_df[root_df['expiration'] >= int(current_date.strftime('%Y%m%d'))]['expiration'].unique()
                unique_expirations = sorted(future_expirations)[:3]
                filtered_root_df = root_df[root_df['expiration'].isin(unique_expirations)]

            elif root in _spy_root:
                future_expirations = root_df[root_df['expiration'] >= int(current_date.strftime('%Y%m%d'))]['expiration'].unique()
                unique_expirations = sorted(future_expirations)[:3]
                filtered_root_df = root_df[root_df['expiration'].isin(unique_expirations)]

            elif root in _spx_root:
                future_expirations = root_df[root_df['expiration'] >= int(current_date.strftime('%Y%m%d'))]['expiration'].unique()
                unique_expirations = sorted(future_expirations)[:3]
                filtered_root_df = root_df[root_df['expiration'].isin(unique_expirations)]
            #elif root == 'UVXY':
            #    future_expirations = root_df[root_df['expiration'] >= int(current_date.strftime('%Y%m%d'))]
            #    filtered_dfs.append(future_expirations)
            #elif root == 'VIX':
            #    future_expirations = root_df[root_df['expiration'] >= int(current_date.strftime('%Y%m%d'))]
            #    filtered_dfs.append(future_expirations)
            else:
                filtered_dfs.append(df[df['root'] == root])
            filtered_dfs.append(filtered_root_df)
        df = pd.concat(filtered_dfs).reset_index(drop=True)
        #print("SPXW ")
        #print(df[df['root'] == 'SPXW']['expiration'].unique())
        
        #print("round SPX ")
        #print(df[df['root'] == 'SPX']['expiration'].unique())
        
        #print("round UVXY ")
        #print(df[df['root'] == 'UVXY']['expiration'].unique())

        #print("round SPY ")
        #print(df[df['root'] == 'SPY']['expiration'].unique())
        
        #print("round QQQ ")
        #print(df[df['root'] == 'QQQ']['expiration'].unique())

        #print("round VIX ")
        #print(df[df['root'] == 'VIX']['expiration'].unique())

        #print("round CAT ")
        #print(df[df['root'] == 'CAT']['expiration'].unique())


    else:
        continue  # Skip to next date if no contracts

    success_folder_name = "Strike_History_Success"
    error_folder_name = "Strike_History_Errors"
    conditional_descriptor = 'a'
    successful_contracts_list = []
    error_contracts_list = []
    i = 1
    tqdm_iterator = tqdm(df.iterrows(), desc="Processing contracts")
    for index, contract in tqdm_iterator:
        desc = f"Root: {contract['root']}, Strike: {contract['strike']}, Expiration: {contract['expiration']}, Right: {contract['right']}, Date: {contract['date']}, Contract:{i}"
        tqdm_iterator.set_description(desc)
        if conditional_descriptor != contract['root']:
            i = 0
            conditional_descriptor = contract['root']
            log_strike_success_to_json(successful_contracts_list, base_directory)
            log_strike_error_to_json(error_contracts_list, base_directory)
            # After processing and logging for the current root
            successful_contracts_list = []
            error_contracts_list = []

            
            
        i = i+1
        # Define constants
        root = str(contract['root'])
        expiration = str(contract['expiration'])
        strike = str(contract['strike'])
        right = str(contract['right'])
        start_date = str(contract['date'])  # Assuming 'date' is in 'YYYYMMDD' format
        end_date = str(contract['date']) 


        success_folder_path = os.path.join(base_directory, root, success_folder_name, start_date)
        error_folder_path = os.path.join(base_directory, root, error_folder_name, start_date)
        success_contracts = load_logged_contracts(success_folder_path, 'Completed_log.json')
        error_contracts = load_logged_contracts(error_folder_path, 'errors_log.json')
        
        # Define a set or dictionary for quicker search
        success_entries = {(entry["root"], entry["expiration"], entry["strike"], entry["right"], entry["start_date"], entry["end_date"]) for entry in success_contracts}
        error_entries = {(entry["root"], entry["expiration"], entry["strike"], entry["right"], entry["start_date"], entry["end_date"]) for entry in error_contracts}
        #only run below if debugging
            #keys = ["root", "expiration", "strike", "right", "start_date", "end_date"]

            #success_entries = {get_normalized_entry(entry, keys) for entry in success_contracts}
            #error_entries = {get_normalized_entry(entry, keys) for entry in error_contracts}


        
        contract_tuple = (root, expiration, strike, right, start_date, end_date)
        #i = i+1
        #if i == 850:
            #print('hi')
        if contract_tuple in success_entries or contract_tuple in error_entries:
            #print('skipped')
            continue
        try:
            #print
            process_contracts(root,  expiration, strike, right, start_date, end_date)
        except KeyError as e:
            error_contracts_list.append({
                "root":root,
                "expiration":expiration, 
                "strike":strike, 
                "right":right, 
                "start_date":start_date, 
                "end_date":end_date, 
            })

Total Contracts for Project Complete: 100%|████████████████████████████████████████████| 18/18 [00:09<00:00,  1.86it/s]
Total Project Complete:   0%|                                                                   | 0/18 [00:00<?, ?it/s]
Processing contracts: 0it [00:00, ?it/s][A
Root: UVXY, Strike: 1000, Expiration: 20221216, Right: C, Date: 20221121, Contract:1: : 0it [00:00, ?it/s][A
Root: UVXY, Strike: 1000, Expiration: 20230120, Right: P, Date: 20221121, Contract:1: : 0it [00:00, ?it/s][A
Root: UVXY, Strike: 2000, Expiration: 20230120, Right: C, Date: 20221121, Contract:2: : 0it [00:00, ?it/s][A
Root: UVXY, Strike: 4000, Expiration: 20221216, Right: P, Date: 20221121, Contract:3: : 0it [00:00, ?it/s][A
Root: UVXY, Strike: 3000, Expiration: 20230120, Right: C, Date: 20221121, Contract:4: : 0it [00:00, ?it/s][A
Root: UVXY, Strike: 4000, Expiration: 20230120, Right: C, Date: 20221121, Contract:5: : 0it [00:00, ?it/s][A
Root: UVXY, Strike: 5000, Expiration: 20221216, Right: C

In [3]:
print("complete")

complete
