In [None]:
# create segment using '\n' in generated limitations

def process_single_limitation(limitation_text):
    # Split the text on newline characters to handle different blocks
    blocks = limitation_text.split('\n')
    processed_blocks = []
    for block in blocks:
        # Split each block into sentences by '.'
        sentences = [sentence.strip() for sentence in block.split('.') if sentence.strip()]
        if sentences:  # Only add non-empty lists
            processed_blocks.append(sentences)
    return processed_blocks

# Apply the function to each row in the 'Limitations' column
df2['processed_limitations'] = df2['generated_limitations_mis_all'].apply(process_single_limitation)


In [None]:
# Function to add numbers to each list in a list of lists
def add_numbers_to_list_of_lists(data):
    numbered_list = []
    number = 1  # Start numbering from 1
    for inner_list in data:
        # Add the number to the beginning of each list
        numbered_list.append([f"{number}."] + inner_list)
        number += 1
    return numbered_list

# Apply the function to the 'processed_limitations' column
df2['processed_limitations'] = df2['processed_limitations'].apply(add_numbers_to_list_of_lists)


In [None]:
def clean_and_filter_text(lists):
    # Process each sublist
    cleaned_lists = []
    for sublist in lists:
        # Filter out empty strings and strip whitespace
        cleaned_sublist = [s.strip() for s in sublist if s.strip()]
        if cleaned_sublist:  # Only add non-empty sublists
            cleaned_lists.append(cleaned_sublist)
    return cleaned_lists

df2['processed_limitations'] =df2['processed_limitations'].apply(clean_and_filter_text)


In [None]:
# convert str to list
import ast

# Convert strings in the 'Lim_points_gt_gpt_lists' column to lists of lists
df2['Lim_points_gt_gpt_lists'] = df2['Lim_points_gt_gpt_lists'].apply(ast.literal_eval)


In [None]:
# Initialize a new column 'combined' with empty lists
df2['combined'] = [[] for _ in range(len(df2))]

# Nested loop to generate combinations without using product
for i in range(len(df2)):
    combined_list = []
    list1 = df2['Lim_points_gt_gpt_lists'][i]
    list2 = df2['processed_limitations'][i]

    # Nested loop for combinations
    for item1 in list1:
        for item2 in list2:
            #if 'Future' not in item1 and 'Future' not in item2:
            combined_list.append((item1, item2))
            # combined_list.append((item1, item2))

    # Store the combinations in the 'combined' column for the current index
    df2.at[i, 'combined'] = combined_list

In [None]:
# at first, generate summary (index 0 to 412) (done)
import os
from openai import OpenAI

os.environ['OPENAI_API_KEY'] = ''

client = OpenAI(
    # This is the default and can be omitted
    api_key=os.environ.get("OPENAI_API_KEY"),
)
all_generated_summary = []

for i in range(len(df2)): # len(df2)
    generated_summary = []
    for description1, description2 in df2['combined'][i]:
      prompt = '''Check whether 'list2' contains a topic or limitation from 'list1' or 'list1' contains a topic or limitation from 'list2'.
      Your answer should be "Yes" or "No" \n. List 1:''' + str(description1) + "List2: " + str(description2)
      summary_text = ""  # Initialize an empty string to collect the limitation text
      stream = client.chat.completions.create(
          # model="gpt-3.5-turbo",
          model="gpt-4o-mini",
          messages=[
              {
                  "role": "user",
                  "content": prompt,
              }
          ],
          stream=True,
          temperature=0  # Adjust the temperature as needed, max_tokens=150
      )

      for chunk in stream:
          summary_chunk = chunk.choices[0].delta.content or ""
          # print(limitation_chunk, end="")
          summary_text += summary_chunk  # Append each chunk to the limitation_text

      # print("\n")  # Print a newline for readability
      summary_chunks = []
      summary_chunks.append(summary_text)

      generated_summary.append((summary_chunks, "list1", description1, "list2", description2))
    all_generated_summary.append(generated_summary)


Ground Truth Coverage

In [None]:
# process 2
data = []
row_num = 1  # Start row_num from 1, increment for each sublist

# Extract data from nested_list2
for sublist in all_generated_summary:
    for is_match, list1_label, ground_truth, list2_label, llm_generated in sublist:
        # Each tup is in the form of (list1, s1, s2, s3, s4)
        # Append data to list as a dictionary to maintain column order
        data.append({
            'row_num': row_num,
            'is_match': is_match[0],
            'ground_truth': ground_truth,
            'llm_generated': llm_generated
        })
    row_num += 1  # Increment row_num for each new sublist

# Create DataFrame from the list of dictionaries
df4 = pd.DataFrame(data)

In [None]:
import re

# Update the function to handle lists in each row
def extract_first_number_from_list(row):
    for text in row:  # Iterate through each string in the list
        match = re.match(r'^(\d+)', text)
        if match:
            return int(match.group(1))
    return None  # Return None if no number is found

# Apply the updated function to the 'ground_truth' column
df4['section'] = df4['ground_truth'].apply(extract_first_number_from_list)

# Initialize variables
current_section = None
section_has_yes = False
ck = 0

# Iterate through the DataFrame
for index, row in df4.iterrows():
    # Check if we are still in the same section
    if row['section'] == current_section:
        # Check if there is a 'Yes' in 'is_match'
        if row['is_match'] == 'Yes':
            section_has_yes = True
    else:
        # We've reached a new section, check if the last section had a 'Yes'
        if section_has_yes:
            ck += 1
        # Reset for new section
        current_section = row['section']
        section_has_yes = (row['is_match'] == 'Yes')

# Check the last section after exiting the loop
if section_has_yes:
    ck += 1



In [None]:
# total number of unique ground truth

# Calculate consecutive blocks where 'ground_truth' is the same
unique_blocks = df4['ground_truth'].ne(df4['ground_truth'].shift()).cumsum()

# Group by these blocks and count each group
group_counts = df4.groupby(unique_blocks)['ground_truth'].agg(['count'])

# Output the results
print("Number of unique consecutive 'ground_truth' texts and their counts:")


In [None]:
print("Ground Truth limitation coverage:" ck*100/group_counts)

LLM Generated Coverage

In [None]:
def extract_first_number(text):
    import re
    # Check if the input is a list
    if isinstance(text, list):
        # Join the list elements into a single string
        text = " ".join(text)
    # Use regex to extract the first number
    match = re.match(r'^(\d+)', text)
    return int(match.group(1)) if match else None

# Apply the updated function to extract numbers
df4['order'] = df4['llm_generated'].apply(extract_first_number)

# Sort the DataFrame by 'row_num' and then by the extracted order
df_recall = df4.sort_values(by=['row_num', 'order'])

# Reset index for clean indices in the new DataFrame
df_recall = df_recall.reset_index(drop=True)

In [None]:
# Reorder the columns by placing 'llm_generated' before 'ground_truth'
df_recall = df_recall[['row_num', 'is_match', 'llm_generated', 'ground_truth', 'section', 'order']]


In [None]:
# how many
# df_recall.drop('section', axis=1, inplace=True)
import re

def extract_first_number(text):
    import re
    # Check if the input is a list
    if isinstance(text, list):
        # Join the list elements into a single string
        text = " ".join(text)
    # Use regex to extract the first number
    match = re.match(r'^(\d+)', text)
    return int(match.group(1)) if match else None

# Extract the first number and create a new column for it
df_recall['section'] = df_recall['llm_generated'].apply(extract_first_number)

# Initialize variables
current_section = None
section_has_yes = False
ck = 0

# Iterate through the DataFrame
for index, row in df_recall.iterrows():
    # Check if we are still in the same section
    if row['section'] == current_section:
        # Check if there is a 'Yes' in 'is_match'
        if row['is_match'] == 'Yes':
            section_has_yes = True
    else:
        # We've reached a new section, check if the last section had a 'Yes'
        if section_has_yes:
            ck += 1
        # Reset for new section
        current_section = row['section']
        section_has_yes = (row['is_match'] == 'Yes')

# Check the last section after exiting the loop
if section_has_yes:
    ck += 1

print("Number of sections with at least one 'Yes':", ck)

In [None]:
# total number of unique ground truth

# Calculate consecutive blocks where 'ground_truth' is the same
unique_blocks = df_recall['llm_generated'].ne(df_recall['llm_generated'].shift()).cumsum()

# Group by these blocks and count each group
group_counts = df_recall.groupby(unique_blocks)['llm_generated'].agg(['count'])

# Output the results
print("Number of unique consecutive 'ground_truth' texts and their counts:")
print(group_counts)

In [None]:
print("LLM Generated limitation coverage:" ck*100/group_counts)