# Analyze Draft Application to Identify Support for Features in IDF
This Jupyter notebook takes in an IDF document and a Draft Application, creates chunks of text from the IDF that include invention details and examples, and outputs a report indicating how well supported the text chunks are by the Draft Application.

## Instructions:
0. Set your variables and environment

1. Upload a plain text IDF file and plain text detailed description file to the jupyter notebook workspace

2. update the IDF_filename and description_filename variables to point to the IDF and detailed description files you uploaded

## 0. Setup your variables and environment 

In [1]:
IDF_filename = 'IDF Text (0111-0266PRO).txt' 
description_filename = 'Detailed Description Text (0111-0266PRO).txt' 
auth_token = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJlbWFpbCI6Im1hbGxlbkBoYXJyaXR5bGxwLmNvbSIsImZpcnN0TmFtZSI6Ik1hdHQiLCJsYXN0TmFtZSI6IkFsbGVuIiwicGVybWlzc2lvbnMiOlsicG9ydGZvbGlvIiwidXNlcnMiLCJvcmdhbml6YXRpb25zIiwic3BlY2lmaWNhdGlvbnMiLCJmb3JtcyIsInRlbXBsYXRlcyIsImFwcGxpY2F0aW9ucyIsInVzcHRvIiwibWVzc2FnZXMiLCJ3b3JrZmxvd3MiLCJhZG1pbmlzdHJhdGlvbiIsImRvY2tldGluZ190YXNrcyIsImFkbWluaXN0cmF0aW9uX3Rhc2tzIiwiYm90X3NjcmlwdHMiLCJwcm9tcHRzIl0sInRlYW1zIjpbIlByb3NlY3V0aW9uIEF0dG9ybmV5cyJdLCJpYXQiOjE3MzEwMzg2NDgsImV4cCI6MTczMzYzMDY0OCwic3ViIjoiNWQ0ZGMzODEzZmUyMGZlODc1YTRmNjJlIn0.PyyCcZOa0HaFPW2pESP9-HpuAnC7NTpbPcGg3l-zxqM'

In [2]:
# staging Auth token
# auth_token = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJlbWFpbCI6Im1hbGxlbkBoYXJyaXR5bGxwLmNvbSIsImZpcnN0TmFtZSI6Ik1hdHQiLCJsYXN0TmFtZSI6IkFsbGVuIiwicGVybWlzc2lvbnMiOlsicG9ydGZvbGlvIiwidXNlcnMiLCJvcmdhbml6YXRpb25zIiwic3BlY2lmaWNhdGlvbnMiLCJmb3JtcyIsInRlbXBsYXRlcyIsImFwcGxpY2F0aW9ucyIsInVzcHRvIiwibWVzc2FnZXMiLCJ3b3JrZmxvd3MiLCJhZG1pbmlzdHJhdGlvbiIsImRvY2tldGluZ190YXNrcyIsImFkbWluaXN0cmF0aW9uX3Rhc2tzIiwiYm90X3NjcmlwdHMiLCJwcm9tcHRzIl0sInRlYW1zIjpbXSwiaWF0IjoxNzMxMDk3MjI4LCJleHAiOjE3MzM2ODkyMjgsInN1YiI6IjY1MTczYTFlYWRkYjA0M2RkM2QzMTI5NiJ9.myQU_ot5YREJCI3k-uB0YxSF4rvgi4rZGH9PcZomx7I'

In [3]:
# Uncomment to install any missing libraries
!pip install --upgrade pip
!pip install requests numpy pandas matplotlib tqdm ipywidgets widgetsnbextension pandas-profiling tabulate

Collecting pip
  Using cached pip-24.3.1-py3-none-any.whl.metadata (3.7 kB)
Using cached pip-24.3.1-py3-none-any.whl (1.8 MB)
Installing collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 24.2
    Uninstalling pip-24.2:
      Successfully uninstalled pip-24.2
Successfully installed pip-24.3.1
Collecting numpy
  Using cached numpy-2.1.3-cp313-cp313-macosx_11_0_arm64.whl.metadata (116 kB)
Collecting pandas
  Using cached pandas-2.2.3-cp313-cp313-macosx_11_0_arm64.whl.metadata (89 kB)
Collecting matplotlib
  Using cached matplotlib-3.9.2-cp313-cp313-macosx_11_0_arm64.whl.metadata (11 kB)
Collecting tqdm
  Using cached tqdm-4.67.0-py3-none-any.whl.metadata (57 kB)
Collecting ipywidgets
  Using cached ipywidgets-8.1.5-py3-none-any.whl.metadata (2.3 kB)
Collecting widgetsnbextension
  Using cached widgetsnbextension-4.0.13-py3-none-any.whl.metadata (1.6 kB)
Collecting pandas-profiling
  Using cached pandas_profiling-3.2.0-py2.py3-none-any.whl.metadata (2

In [4]:
# Import necessary libraries
import pandas as pd
import re, requests, json, urllib3
#from tqdm import tqdm          # progress bar library
from IPython.display import display, Markdown, JSON
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

ModuleNotFoundError: No module named 'pandas'

In [None]:
# Define helper functions
import requests
import re

def get_prompt_chain(prompt_id):
    #promptChainURL = f"https://backoffice.staging.harritydev.com/api/prompts/{prompt_id}"
    promptChainURL = f"https://backoffice.h2tools.hhllp.local/api/prompts/{prompt_id}"
    headers = {
        'Authorization': f"Bearer {auth_token}",  
        'Content-Type': 'application/json'
    }
    promptChain = requests.get(promptChainURL, headers=headers, verify=False)
    return promptChain.json()

def get_variables(chain):
    temp_vars = []
    for link in chain.get("template").get("chain"):
        if link.get("variables"):
            for var in link["variables"]:
                temp_vars.append(var["name"])
    return temp_vars

def set_variables(chain, variable_name, value):
    for link in chain.get("template").get("chain"):
        if link.get("variables"):
            for var in link.get("variables"):
                if (var["name"] == variable_name):
                    var["value"] = value

def run_prompt_chain(chain_object):
    #url = 'https://backoffice.staging.harritydev.com/api/v2/nlp/prompts'
    url = 'https://backoffice.h2tools.hhllp.local/api/v2/nlp/prompts' 
    headers = {
    "authorization": f"Bearer {auth_token}",
    "content-type": "application/json",
    }
    return requests.post(url, headers=headers, json=chain_object["template"], verify=False, timeout=600)

# Function to execute the prompt chain and parse output
def process_chunk(chain, df_chunk, description_text):
    # Convert the DataFrame chunk to JSON
    json_chunk = df_chunk.to_json(orient='records')
    
    # Set variables for the prompt chain
    set_variables(chain, "IDFchunks", json_chunk)
    set_variables(chain, "DraftApp", description_text)
    
    # Run the prompt chain
    prompt_output = run_prompt_chain(chain)

    # For debugging
    # print(prompt_output.text)
    
    # Parse the JSON string into a Python dictionary
    data = json.loads(prompt_output.text)
    
    # Extract the list of disclosure details
    disclosure_details = data["Disclosure Details"]
    
    return disclosure_details

# Function to execute the prompt chain and parse output - text only output
def process_chunk_to(chain, df_chunk, description_text):
    # Convert the DataFrame chunk to JSON
    json_chunk = df_chunk.to_json(orient='records')
    
    # Set variables for the prompt chain
    set_variables(chain, "IDFchunks", json_chunk)
    set_variables(chain, "DraftApp", description_text)
    
    # Run the prompt chain
    prompt_output = run_prompt_chain(chain)
   
    return prompt_output

In [None]:
# Grab the chain that provides chunks from the IDF
IDF_chunks_chain_a = get_prompt_chain('673a198a9f7703f61841087f')
IDF_chunks_chain_b = get_prompt_chain('673a15929f7703952741087e')
support_identifying_chain_a = get_prompt_chain('672d80eb4669618119831eba')
support_identifying_chain_b = get_prompt_chain('672d811c4669616bb5831ebb')
JSON_chain = get_prompt_chain('67322512466961bc79831ec5')

#the chains below are no longer used because of the timeout issue
#support_identifying_chain = get_prompt_chain('672d26984669611b2e831eb8')
# IDF_chunks_chain = get_prompt_chain('672d12c04669613f04831eb7')

#Uncomment if you want to inspect the prompt chains
#display(JSON(IDF_chunks_chain)) 
#display(JSON(support_identifying_chain)) 

# Uncomment to show the variable names needed by the prompt chain
#display(Markdown(f"### Provide the \"{IDF_chunks_chain['name']}\" prompt with these variables: {get_variables(IDF_chunks_chain)}."))
#display(Markdown(f"### Provide the \"{support_identifying_chain['name']}\" prompt with these variables: {get_variables(support_identifying_chain)}."))

In [None]:
# Staging prompt chains
#IDF_chunks_chain_a = get_prompt_chain('673a682e30f85ab4f367627d')
#IDF_chunks_chain_b = get_prompt_chain('673a685d30f85a931167627e')
#support_identifying_chain_a = get_prompt_chain('672e70d6786ea227109f68e1')
#support_identifying_chain_b = get_prompt_chain('672e7113786ea27bc49f68e2')
#JSON_chain = get_prompt_chain('6739ff0a30f85a414f67627c')

# old chain no longer in use:
#IDF_chunks_chain = get_prompt_chain('672e6f66786ea24fdb9f68df')

#Uncomment if you want to inspect the prompt chains
#display(JSON(IDF_chunks_chain)) 
#display(JSON(support_identifying_chain_a)) 

In [None]:
# get IDF text
with open(IDF_filename, 'r') as file:
    IDF_text = file.read()
file.close()
# print(IDF_text)

In [None]:
# Run the first prompt chain twice to get IDF chunks
set_variables(IDF_chunks_chain_a, "IDF", IDF_text) #Populate the chain's variable name[s] shown by the get_variables() function in the cell above
prompt1_output_1 = run_prompt_chain(IDF_chunks_chain_a)
print(prompt1_output_1.text)
prompt1_output_2 = run_prompt_chain(IDF_chunks_chain_a)
print(prompt1_output_2.text)

### Background
1. LPDDR 6 will support 32 bits of meta data, that can according to the JEDEC standard be used for system meta data (data written by host, e.g. end-to-end ECC), Link ECC, DBI (Data Bus Inversion)
2. DBI is implementing a so-called inversion encoding that helps reducing the number of transitions on the bus which improves power consumption as well as signal integrity
3. With the increase of per pin bitrates to values >10,000 MTS, especially the signal integrity improvement is considered as very valuable by customers
4. Additionally, Link ECC/EDC is relevant for improved system robustness and error detection – This is important for improved RAS (Reliability, Availability, Serviceability) and FuSa (Functional Safety) in automotive
5. The combination table provided on the next page shows that DBI and Link ECC/EDC cannot be performed together

### Invention Details
1. This invention describes a way, that allows to perform DBI and Link ECC/EDC in parallel
2. The idea of this in

In [None]:
# Run chain that uses chunks to come up with combined chunks
set_variables(IDF_chunks_chain_b, "IDF", IDF_text) #Populate the chain's variable name[s] shown by the get_variables() function in the cell above
set_variables(IDF_chunks_chain_b, "firstChunks", prompt1_output_1.text)
set_variables(IDF_chunks_chain_b, "secondChunks", prompt1_output_2.text)
prompt1_output = run_prompt_chain(IDF_chunks_chain_b)
display(JSON(json.loads(prompt1_output.text)))

<IPython.core.display.JSON object>

In [None]:
# ORIGINAL single-chunk chain
# Run the first prompt chain to get the IDF chunks
# set_variables(IDF_chunks_chain, "IDF", IDF_text) #Populate the chain's variable name[s] shown by the get_variables() function in the cell above
# prompt1_output = run_prompt_chain(IDF_chunks_chain)

In [None]:
# Test that we got output from the prompt chain
# print(prompt_output.text)
#display(JSON(json.loads(prompt1_output.text)))

In [None]:
# Parse the JSON string into JSON for creating a dataframe
data = json.loads(prompt1_output.text)

# Extracting Invention Details and Examples
invention_details = data.get("Invention Details", [])
examples = data.get("Examples", [])

# Creating DataFrames
df_invention_details = pd.DataFrame(invention_details, columns=["Invention Details"])
df_examples = pd.DataFrame(examples, columns=["Examples"])

In [None]:
# Display invention details data frame 
#df_invention_details

Unnamed: 0,Invention Details
0,"This invention describes a way, that allows to..."
1,Data Burst: 288-bits over 12 pulses (24 Beats ...
2,Sub-channel 0/1: Meta Function 1: M0 through M...
3,System Meta Carve-OUT: System meta data is 1st...
4,This approach was chosen (vs. a direct write t...
5,16-bits (2B) transfer between internal registe...
6,256-bits (32B) transfer between internal regis...
7,The idea of this invention is to use the slots...
8,"This way, bits L0 … L15 are available to imple..."
9,"In the state-of-the-art approach, inversion co..."


In [None]:
# Display examples data frame 
#df_examples

In [None]:
# This Begins an Example on 5 rows only

In [None]:
# this was originally a test, but I put it into a function that we dont call. remove it from the function to test
def test_prompt():
    # get Detailed Description text
    with open(description_filename, 'r') as file:
        description_text = file.read()
    file.close()
    # print(description_text)
    
    # get first 5 rows of IDF chunks to test
    first_5_invention_details = df_invention_details.head(5)
    
    # Convert the first 5 rows to JSON
    json_invention_details = first_5_invention_details.to_json(orient='records')
    
    # Print the JSON strings
    print("JSON for the first 5 rows of Invention Details DataFrame:")
    print(json_invention_details)

    # Execute the prompt chain on the 5 rows
    set_variables(support_identifying_chain, "IDFchunks", json_invention_details) #Populate the chain's variable name[s] shown by the get_variables() function in the cell above
    set_variables(support_identifying_chain, "DraftApp", description_text) 
    prompt2_output = run_prompt_chain(support_identifying_chain)
    # print(prompt2_output.text)

    # Display results of prompt2
    # print(prompt2_output.text)
    display(JSON(json.loads(prompt2_output.text)))

    # Parse the JSON string into a Python dictionary
    data = json.loads(prompt2_output.text)
    
    # Extract the list of disclosure details
    disclosure_details = data["Disclosure Details"]
    
    # Create a DataFrame with the extracted data
    df = pd.DataFrame(disclosure_details, columns=["idfChunk", "disclosureText", "disclosureScore"])
    
    # Ensure disclosureScore is treated as integers
    df['disclosureScore'] = df['disclosureScore'].astype(int)
    
    # Display the DataFrame
    df
    #print(df)

    # Sort the DataFrame by the disclosureScore column in ascending order
    df_sorted = df.sort_values(by="disclosureScore", ascending=True)
    
    # Display the sorted DataFrame
    df_sorted
    # print(df_sorted)

def generate_html_df(df):
    # Convert DataFrame to HTML with custom CSS
    html_string = '''
    <!DOCTYPE html>
    <html>
    <head>
    <title>DataFrame Output</title>
    <style>
    body {{
    font-family: Arial, sans-serif;
    margin: 20px;
    }}
    table {{
    width: 100%;
    border-collapse: collapse;
    margin-bottom: 20px;
    }}
    th, td {{
    border: 1px solid #ddd;
    padding: 8px;
    text-align: left;
    }}
    th {{
    background-color: #f2f2f2;
    }}
    tr:nth-child(even) {{
    background-color: #f9f9f9;
    }}
    tr:hover {{
    background-color: #f1f1f1;
    }}
    </style>
    </head>
    <body>
    <h2>DataFrame Output</h2>
    {table}
    </body>
    </html>
    '''.format(table=df.to_html(index=False))
    
    # Save the HTML string to a file
    html_file_path = 'dataframe_output.html'
    with open(html_file_path, 'w') as f:
        f.write(html_string)
    
    print(f"DataFrame has been exported as HTML to {html_file_path}")

## 3. Iterate over the full data set

In [None]:
# Read Detailed Description text
with open(description_filename, 'r') as file:
    description_text = file.read()

# Define the size of each chunk
chunk_size = 10

# Initialize an empty list to collect all disclosure details
all_disclosure_details = []

# Loop through the entire DataFrame in chunks
for start in range(0, len(df_invention_details), chunk_size):
    # Get the chunk of the DataFrame
    df_chunk = df_invention_details.iloc[start:start + chunk_size]
    print(df_chunk)
    
    # Process the chunk and get disclosure details twice
    disclosure_details_a = process_chunk_to(support_identifying_chain_a, df_chunk, description_text)
    display(JSON(json.loads(disclosure_details_a.text)))
    disclosure_details_b = process_chunk_to(support_identifying_chain_a, df_chunk, description_text)
    display(JSON(json.loads(disclosure_details_b.text)))

    # Combine results a and b with other prompt chain
    # Set variables for the prompt chain
    set_variables(support_identifying_chain_b, "DraftApp", description_text)
    set_variables(support_identifying_chain_b, "FirstDisclosureRun", disclosure_details_a.text)
    set_variables(support_identifying_chain_b, "SecondDisclosureRun", disclosure_details_b.text)
    json_chunk = df_chunk.to_json(orient='records')
    set_variables(support_identifying_chain_b, "IDFchunks", json_chunk)
    
    # Run the prompt chain
    prompt_output = run_prompt_chain(support_identifying_chain_b)

    # Run JSON chain
    set_variables(JSON_chain, "IDFchunks", json_chunk)
    set_variables(JSON_chain, "DisclosureCheck", prompt_output.text)
    prompt_output = run_prompt_chain(JSON_chain)
    
    # Parse the JSON string into a Python dictionary
    data = json.loads(prompt_output.text)
    display(JSON(data))
    
    # Extract the list of disclosure details
    disclosure_details_c = data["Disclosure Details"]

    # Append the disclosure details to the list
    all_disclosure_details.extend(disclosure_details_c)

# Create a DataFrame with the collected disclosure details
df_all_disclosures = pd.DataFrame(all_disclosure_details, columns=["IDF Text", "Specification Text", "Score"])

df_all_disclosures

                                   Invention Details
0  This invention describes a way, that allows to...
1  The idea of this invention is to use the slots...
2  This way, bits L0 … L15 are available to imple...
3  In the state-of-the-art approach, inversion co...
4  In case system meta data bits (M0 … M15) are u...
5  Encoding as well as decoding is done on host c...
6  The trade-off of the approach proposed on the ...
7  This can still be feasible because, for exampl...
8  In case a 2nd meta data function (e.g. system ...
9  In the JEDEC implementation, 16 data bits are ...


<IPython.core.display.JSON object>

<IPython.core.display.JSON object>

<IPython.core.display.JSON object>

                                    Invention Details
10  In case a 2nd meta data function is required t...
11  This can be used in a flexible way, using n bi...


<IPython.core.display.JSON object>

<IPython.core.display.JSON object>

<IPython.core.display.JSON object>

Unnamed: 0,idfChunk,disclosureText,disclosureScore
0,"This invention describes a way, that allows to...",Some implementations described herein enable s...,5
1,The idea of this invention is to use the slots...,As described in greater detail elsewhere herei...,5
2,"This way, bits L0 … L15 are available to imple...",The host system may generate link parity infor...,5
3,"In the state-of-the-art approach, inversion co...",To communicate a data packet from the host sys...,4
4,In case system meta data bits (M0 … M15) are u...,"Further, the memory system may store the inver...",5
5,Encoding as well as decoding is done on host c...,The host system may encode a payload according...,4
6,The trade-off of the approach proposed on the ...,"However, some communication protocols may not ...",4
7,"This can still be feasible because, for exampl...",The communication control information may incl...,2
8,In case a 2nd meta data function (e.g. system ...,"In some cases, the host system may include one...",3
9,"In the JEDEC implementation, 16 data bits are ...",NO DISCLOSURE,0


In [None]:
# loop through Examples (same description_text and chunk_size)
# Initialize an empty list to collect all example
all_example_details = []

# Loop through the entire DataFrame of examples in chunks
for start in range(0, len(df_examples), chunk_size):
    # Get the chunk of the DataFrame
    df_chunk = df_examples.iloc[start:start + chunk_size]
    print(df_chunk)
    
    # Process the chunk and get disclosure details twice
    disclosure_details_a = process_chunk_to(support_identifying_chain_a, df_chunk, description_text)
    display(JSON(json.loads(disclosure_details_a.text)))
    disclosure_details_b = process_chunk_to(support_identifying_chain_a, df_chunk, description_text)
    display(JSON(json.loads(disclosure_details_b.text)))

    # Combine results a and b with other prompt chain
    # Set variables for the prompt chain
    set_variables(support_identifying_chain_b, "DraftApp", description_text)
    set_variables(support_identifying_chain_b, "FirstDisclosureRun", disclosure_details_a.text)
    set_variables(support_identifying_chain_b, "SecondDisclosureRun", disclosure_details_b.text)
    json_chunk = df_chunk.to_json(orient='records')
    set_variables(support_identifying_chain_b, "IDFchunks", json_chunk)
    
    # Run the prompt chain
    prompt_output = run_prompt_chain(support_identifying_chain_b)

    # Run JSON chain
    set_variables(JSON_chain, "IDFchunks", json_chunk)
    set_variables(JSON_chain, "DisclosureCheck", prompt_output.text)
    prompt_output = run_prompt_chain(JSON_chain)
    
    # Parse the JSON string into a Python dictionary
    data = json.loads(prompt_output.text)
    display(JSON(data))
    
    # Extract the list of disclosure details
    disclosure_details_c = data["Disclosure Details"]

    # Append the disclosure details to the list
    all_example_details.extend(disclosure_details_c)

# Create a DataFrame with the collected disclosure details
df_all_examples = pd.DataFrame(all_example_details, columns=["IDF Text", "Specification Text", "Score"])

df_all_examples

                                            Examples
0  Data Burst: 288-bits over 12 pulses (24 Beats ...
1  Sub-channel 0/1: Meta Function 1: M0 through M...
2  System Meta Carve-OUT: System meta data is 1st...
3  This approach was chosen (vs. a direct write t...
4  16-bits (2B) transfer between internal registe...
5  LPDDR6 DBI Data Packet Format: JEDEC DBI data ...


<IPython.core.display.JSON object>

<IPython.core.display.JSON object>

<IPython.core.display.JSON object>

Unnamed: 0,idfChunk,disclosureText,disclosureScore
0,Data Burst: 288-bits over 12 pulses (24 Beats ...,The data packet 300 may include one or more el...,4
1,Sub-channel 0/1: Meta Function 1: M0 through M...,"For example, the communication control informa...",4
2,System Meta Carve-OUT: System meta data is 1st...,"After receiving a data packet, the memory syst...",5
3,This approach was chosen (vs. a direct write t...,This approach may reduce power consumption and...,2
4,16-bits (2B) transfer between internal registe...,The host system 205 may segment a data message...,3
5,LPDDR6 DBI Data Packet Format: JEDEC DBI data ...,An inversion configuration may include one or ...,4


In [None]:
#print(prompt_output.text)

# Prepare Output

In [None]:
# Convert 'disclosureScore' column to numeric values (integer)
df_all_disclosures['disclosureScore'] = pd.to_numeric(df_all_disclosures['disclosureScore'], errors='coerce')
df_all_examples['disclosureScore'] = pd.to_numeric(df_all_examples['disclosureScore'], errors='coerce')

# Total counts for reference
total_disclosures = df_all_disclosures.shape[0]
total_examples = df_all_examples.shape[0]

# Create filtered data frames where the disclosure score is 3 or less
df_details_weak = df_all_disclosures[df_all_disclosures['disclosureScore'] <= 3]
df_examples_weak = df_all_examples[df_all_examples['disclosureScore'] <= 3]
# df_weak_disclosure = pd.concat([df_details_weak, df_examples_weak], ignore_index=True)

# Count the number of weak disclosures in each data frame
count_details_weak = df_details_weak.shape[0]
count_examples_weak = df_examples_weak.shape[0]

# Count the number of disclosures with scores 4 or higher
df_details_strong_count = df_all_disclosures[df_all_disclosures['disclosureScore'] >= 4].shape[0]
df_examples_strong_count = df_all_examples[df_all_examples['disclosureScore'] >= 4].shape[0]

# Print the counts
print("Count of weak disclosures in df_all_disclosures: ", count_details_weak)
print("Count of weak disclosures in df_all_examples: ", count_examples_weak)
#print("Total count of weak disclosures: ", count_weak_disclosure)

# df_weak_disclosure

Count of weak disclosures in df_all_disclosures:  3
Count of weak disclosures in df_all_examples:  2


In [None]:
# Convert both DataFrames to HTML
table1_html = df_all_disclosures.to_html(index=False)
table2_html = df_all_examples.to_html(index=False)
table3_html = df_details_weak.to_html(index=False)
table4_html = df_examples_weak.to_html(index=False)

# Determine the status icons
details_status_icon = '✔️' if df_details_strong_count == total_disclosures else '⚠️'
examples_status_icon = '✔️' if df_examples_strong_count == total_examples else '⚠️'

# Define the HTML template with placeholders for both tables
html_string = '''
<!DOCTYPE html>
<html>
<head>
<title>DataFrame Output</title>
<style>
body {{
font-family: Arial, sans-serif;
margin: 20px;
}}
table {{
width: 100%;
border-collapse: collapse;
margin-bottom: 20px;
}}
th, td {{
border: 1px solid #ddd;
padding: 8px;
text-align: left;
}}
th {{
background-color: #f2f2f2;
}}
tr:nth-child(even) {{
background-color: #f9f9f9;
}}
tr:hover {{
background-color: #f1f1f1;
}}
.collapsible {{
background-color: #f2f2f2;
color: #444;
cursor: pointer;
padding: 10px;
width: 100%;
border: none;
text-align: left;
outline: none;
font-size: 15px;
margin-bottom: 5px;
}}
.active, .collapsible:hover {{
background-color: #ddd;
}}
.content {{
padding: 0 18px;
display: block;
overflow: hidden;
background-color: #f9f9f9;
margin-bottom: 20px;
}}
.summary {{
background-color: #e6f3ff;
border-radius: 5px;
padding: 10px;
margin-bottom: 20px;
font-size: 18px;
line-height: 1.5;
}}
.icon {{
font-size: 20px;
vertical-align: middle;
margin-left: 10px;
}}
</style>
</head>
<body>

<div class="summary">
<strong>Disclosure Review Summary:</strong><br>
Invention Details: {details_count}/{total_details} <span class="icon">{details_status_icon}</span><br>
Examples: {examples_count}/{total_examples} <span class="icon">{examples_status_icon}</span>
</div>

<button class="collapsible">Check Invention Details (score < 4)</button>
<div class="content">
{table3}
</div>

<button class="collapsible">Check Examples (score < 4)</button>
<div class="content">
{table4}
</div>

<button class="collapsible">All Invention Details</button>
<div class="content">
{table1}
</div>

<button class="collapsible">All Examples</button>
<div class="content">
{table2}
</div>

<script>
var coll = document.getElementsByClassName("collapsible");
for (var i = 0; i < coll.length; i++) {{
coll[i].addEventListener("click", function() {{
this.classList.toggle("active");
var content = this.nextElementSibling;
if (content.style.display === "block" || content.style.display === "") {{
content.style.display = "none";
}} else {{
content.style.display = "block";
}}
}});
}}
</script>

</body>
</html>
'''.format(table1=table1_html, table2=table2_html, table3=table3_html, table4=table4_html,
details_count=df_details_strong_count, total_details=total_disclosures,
examples_count=df_examples_strong_count, total_examples=total_examples,
details_status_icon=details_status_icon, examples_status_icon=examples_status_icon)

# Save the HTML string to a file
html_file_path = 'combined_output.html'
with open(html_file_path, 'w') as f:
    f.write(html_string)

print(f"DataFrame has been exported as HTML to {html_file_path}")

DataFrame has been exported as HTML to combined_output.html


In [None]:
# # This is an old version of the report
# # Convert both DataFrames to HTML
# table1_html = df_all_disclosures.to_html(index=False)
# table2_html = df_all_examples.to_html(index=False)
# table3_html = df_details_weak.to_html(index=False)
# table4_html = df_examples_weak.to_html(index=False)

# # Define the HTML template with placeholders for both tables
# html_string = '''
# <!DOCTYPE html>
# <html>
# <head>
# <title>DataFrame Output</title>
# <style>
# body {{
# font-family: Arial, sans-serif;
# margin: 20px;
# }}
# table {{
# width: 100%;
# border-collapse: collapse;
# margin-bottom: 20px;
# }}
# th, td {{
# border: 1px solid #ddd;
# padding: 8px;
# text-align: left;
# }}
# th {{
# background-color: #f2f2f2;
# }}
# tr:nth-child(even) {{
# background-color: #f9f9f9;
# }}
# tr:hover {{
# background-color: #f1f1f1;
# }}
# .collapsible {{
# background-color: #f2f2f2;
# color: #444;
# cursor: pointer;
# padding: 10px;
# width: 100%;
# border: none;
# text-align: left;
# outline: none;
# font-size: 15px;
# margin-bottom: 5px;
# }}
# .active, .collapsible:hover {{
# background-color: #ddd;
# }}
# .content {{
# padding: 0 18px;
# display: block;
# overflow: hidden;
# background-color: #f9f9f9;
# margin-bottom: 20px;
# }}
# .summary {{
# background-color: #e6f3ff;
# border-radius: 5px;
# padding: 10px;
# margin-bottom: 20px;
# font-size: 18px;
# line-height: 1.5;
# }}
# </style>
# </head>
# <body>

# <div class="summary">
# <strong>Disclosure Review Summary:</strong><br>
# Invention Details: {details_count}/{total_details}<br>
# Examples: {examples_count}/{total_examples}
# </div>

# <button class="collapsible">Check Invention Details (score < 4)</button>
# <div class="content">
# {table3}
# </div>

# <button class="collapsible">Check Examples (score < 4)</button>
# <div class="content">
# {table4}
# </div>

# <button class="collapsible">Invention Details</button>
# <div class="content">
# {table1}
# </div>

# <button class="collapsible">Examples</button>
# <div class="content">
# {table2}
# </div>

# <script>
# var coll = document.getElementsByClassName("collapsible");
# for (var i = 0; i < coll.length; i++) {{
# coll[i].addEventListener("click", function() {{
# this.classList.toggle("active");
# var content = this.nextElementSibling;
# if (content.style.display === "block") {{
# content.style.display = "none";
# }} else {{
# content.style.display = "block";
# }}
# }});
# }}
# </script>

# </body>
# </html>
# '''.format(table1=table1_html, table2=table2_html, table3=table3_html, table4=table4_html,
# details_count=df_details_strong_count, total_details=total_disclosures,
# examples_count=df_examples_strong_count, total_examples=total_examples)

# # Save the HTML string to a file
# html_file_path = 'combined_output.html'
# with open(html_file_path, 'w') as f:
#     f.write(html_string)

# print(f"DataFrame has been exported as HTML to {html_file_path}")

In [None]:
# debugging
#disclosure_details_a = process_chunk_to(support_identifying_chain_a, df_chunk, description_text)
#display(JSON(json.loads(disclosure_details_a.text)))
#print(disclosure_details_b.text)

In [None]:
# include background stuff in the report, categorize it somewhere else