In [1]:
import pandas as pd
import os

In [2]:
def replace_empty_turns(old_df, new_df, column_name='Turns'):
    """
    Replaces rows in the old dataframe where the specified column has '{}' with rows from the new dataframe.
    
    Parameters:
    - old_df: The original dataframe where some rows contain '{}'.
    - new_df: The dataframe with the new rows that should replace those containing '{}'.
    - column_name: The name of the column to check for '{}' (default is 'Turns').
    
    Returns:
    - A modified version of the old dataframe with rows replaced by the corresponding rows from new_df.
    """
    # Step 1: Identify rows where the specified column has '{}'
    rows_with_empty_turns = old_df[old_df[column_name] == '{}'].index
    
    # Step 2: Replace those rows in old_df with corresponding rows from new_df
    for idx in rows_with_empty_turns:
        # Assuming new_df rows have the same order as the affected rows in old_df
        new_row = new_df.iloc[rows_with_empty_turns.get_loc(idx)]
        old_df.loc[idx] = new_row
    
    return old_df

In [3]:
llama3_new = pd.read_csv("../critic3_v10/result_df_vredone.csv", index_col=0)
llama3_new["Chat"] = llama3_new["Chat"].apply(lambda x: x.replace("Consumer Grievance Redressal Chatbot:", "AI:"))
print(len(llama3_new))
llama3_new.head()

23


Unnamed: 0,Chat,Turns,Analysis_Combined,Result
0,﻿AI: Hi! I am your consumer grievance assistan...,"{1: {'context': ""\nDocument 0:\nthe remedy ...",Turn 5\r\nInconsistencies Present: Yes\r\nInco...,Inconsistencies detected: Yes.\r\nThe followin...
1,﻿AI: Hi! I am your consumer grievance assistan...,"{1: {'context': ""\nDocument 0:\nCommunication:...",Turn 3\r\nInconsistencies Present: Yes\r\nInco...,Inconsistencies detected: Yes.\r\nThe followin...
2,﻿AI: Hi! I am your consumer grievance assistan...,"{1: {'context': ""\nDocument 0:\nThe service pr...",Turn 3\r\nInconsistencies Present: Yes\r\nInco...,Inconsistencies detected: Yes. \r\nThe followi...
3,﻿AI: Hi! I am your consumer grievance assistan...,"{1: {'context': ""\nDocument 0:\nprotection is ...",Turn 3\r\nInconsistencies Present: Yes\r\nInco...,Inconsistencies detected: Yes.\r\nThe followin...
4,﻿AI: Hi! I am your consumer grievance assistan...,"{1: {'context': ""\nDocument 0:\nthe very onset...",Turn 4\r\nInconsistencies Present: Yes\r\nInco...,Inconsistencies detected: Yes. \r\nThe follow...


In [4]:
gemini_old = pd.read_csv("../critic3_v10_gemini/result_df_v1.csv", index_col=0)
print(len(gemini_old))
gemini_old.head()

115


Unnamed: 0,Chat,Turns,Analysis_Combined,Result
0,﻿AI: Hi! I am your consumer grievance assistan...,{1: {'context': '\nDocument 0:\nwhere the inci...,Turn 5\nInconsistencies Present: Yes\nInconsis...,Inconsistencies detected: Yes.\nThe following ...
1,﻿AI: Hi! I am your consumer grievance assistan...,"{1: {'context': ""\nDocument 0:\nprotection is ...",Turn 6\nInconsistencies Present: Yes\nInconsis...,Inconsistencies detected: Yes.\nThe following ...
2,﻿AI: Hi! I am your consumer grievance assistan...,{1: {'context': '\nDocument 0:\nThe service pr...,Turn 3\nInconsistencies Present: Yes\nInconsis...,Inconsistencies detected: Yes.\nThe following ...
3,﻿AI: Hi! I am your consumer grievance assistan...,"{1: {'context': ""\nDocument 0:\ndistrict of ...",Turn 3\nInconsistencies Present: Yes\nInconsis...,Inconsistencies detected: Yes.\nThe following ...
4,﻿AI: Hi! I am your consumer grievance assistan...,{1: {'context': '\nDocument 0:\nany harmful or...,Turn 3\nInconsistencies Present: Yes\nInconsis...,Inconsistencies detected: Yes.\nThe following ...


In [5]:
deepseek_old = pd.read_csv("../critic3_v10_deepseek/result_df_v1.csv", index_col=0)
# display(deepseek_old.head())
deepseek_new = pd.read_csv("../critic3_v10_deepseek/result_df_vredone.csv", index_col=0)
deepseek_new["Chat"] = deepseek_new["Chat"].apply(lambda x: x.replace("Consumer Grievance Redressal Chatbot:", "AI:"))
# display(deepseek_new.head())
# print(len(deepseek_old), len(deepseek_new))
deepseek = replace_empty_turns(deepseek_old, deepseek_new)
deepseek

Unnamed: 0,Chat,Turns,Analysis_Combined,Result
0,﻿AI: Hi! I am your consumer grievance assistan...,{1: {'context': '\nDocument 0:\ngrievance with...,Turn 2\r\nInconsistencies Present: Yes\r\nInco...,Inconsistencies detected: Yes. \r\nThe follow...
1,﻿AI: Hi! I am your consumer grievance assistan...,"{1: {'context': ""\nDocument 0:\nreceived a pri...",Turn 2\r\nInconsistencies Present: Yes\r\nInco...,Inconsistencies detected: Yes. \r\nThe follow...
2,﻿AI: Hi! I am your consumer grievance assistan...,"{1: {'context': ""\nDocument 0:\nfrom offerin...",Turn 4\r\nInconsistencies Present: Yes\r\nInco...,Inconsistencies detected: Yes. \r\nThe follow...
3,﻿AI: Hi! I am your consumer grievance assistan...,"{1: {'context': ""\nDocument 0:\ncomplainant ...",Turn 1\r\nInconsistencies Present: No\r\nTurn ...,Inconsistencies detected: No.
4,﻿AI: Hi! I am your consumer grievance assistan...,{1: {'context': '\nDocument 0:\nany harmful or...,Turn 3\r\nInconsistencies Present: Yes\r\nInco...,Inconsistencies detected: Yes. \r\nThe follow...
...,...,...,...,...
110,﻿AI: Hi! I am your consumer grievance assistan...,"{1: {'context': ""\nDocument 0:\n○ Refund the...",Turn 3\nInconsistencies Present: Yes\nInconsis...,Inconsistencies detected: Yes. \nThe followin...
111,﻿AI: Hi! I am your consumer grievance assistan...,"{1: {'context': ""\nDocument 0:\nand residence....",Turn 1\nInconsistencies Present: Yes\nInconsis...,Inconsistencies detected: Yes. \nThe followin...
112,﻿AI: Hi! I am your consumer grievance assistan...,"{1: {'context': ""\nDocument 0:\nin this case w...",Turn 3\nInconsistencies Present: Yes\nInconsis...,Inconsistencies detected: Yes. \nThe followin...
113,﻿AI: Hi! I am your consumer grievance assistan...,{1: {'context': '\nDocument 0:\ncomplaint? Log...,Turn 2\nInconsistencies Present: Yes\nInconsis...,Inconsistencies detected: Yes. \nThe followin...


In [6]:
gpt_new = pd.read_csv("../critic3_v10_gpt4omini/result_df_vredone.csv", index_col=0)
gpt_new["Chat"] = gpt_new["Chat"].apply(lambda x: x.replace("Consumer Grievance Redressal Chatbot:", "AI:"))
display(gpt_new.head())
print(len(gpt_new))

Unnamed: 0,Chat,Turns,Analysis_Combined,Result
0,﻿AI: Hi! I am your consumer grievance assistan...,{1: {'context': '\nDocument 0:\nwhere the inci...,Turn 4\nInconsistencies Present: Yes\nInconsis...,Inconsistencies detected: Yes. \nThe followin...
1,﻿AI: Hi! I am your consumer grievance assistan...,"{1: {'context': ""\nDocument 0:\nprotection is ...",Turn 3\nInconsistencies Present: Yes\nInconsis...,Inconsistencies detected: Yes. \nThe followin...
2,﻿AI: Hi! I am your consumer grievance assistan...,"{1: {'context': ""\nDocument 0:\nThe service pr...",Turn 3\nInconsistencies Present: Yes\nInconsis...,Inconsistencies detected: Yes. \nThe followin...
3,﻿AI: Hi! I am your consumer grievance assistan...,{1: {'context': '\nDocument 0:\ndistrict of ...,Turn 1\nInconsistencies Present: No\nTurn 2\nI...,Inconsistencies detected: No.
4,﻿AI: Hi! I am your consumer grievance assistan...,{1: {'context': '\nDocument 0:\nany harmful or...,Turn 3\nInconsistencies Present: Yes\nInconsis...,Inconsistencies detected: Yes. \nThe followin...


115


In [7]:
dfs = {"llama3_new":llama3_new, "deepseek": deepseek, "gpt_new": gpt_new}

In [8]:
# df = llama3_new.copy()
# df['Result'] = df['Result'].str.strip()

# # Step 2: Split the DataFrame into "No" and "Yes" based on the "Result" column
# df_no = df[df['Result'].str.startswith("Inconsistencies detected: No", na=False)]
# df_yes = df[df['Result'].str.startswith("Inconsistencies detected: Yes", na=False)]

# # Step 3: Check if the sum of the lengths of 'df_no' and 'df_yes' match the length of the original DataFrame
# if len(df_no) + len(df_yes) != len(df):
#     unexpected_values = set(df['Result']) - set(df_no['Result']).union(set(df_yes['Result']))

#     # Output the rows that have these unexpected values for debugging
#     print(f"Unexpected values in 'Result' column: {unexpected_values}")

#     # Find and display the rows with those unexpected values
#     unexpected_rows = df[df['Result'].isin(unexpected_values)]
#     print("\nRows with unexpected 'Result' values:")
#     print(unexpected_rows)
#     raise ValueError("Error: The sum of 'No' and 'Yes' categories does not match the original DataFrame's length.")

# # Step 4: Output the "No" and "Yes" DataFrames
# print("DataFrame with 'No' Inconsistencies:")
# print(df_no)

# print("\nDataFrame with 'Yes' Inconsistencies:")
# print(df_yes)

In [15]:
import re
import os
def get_inconsistencies(text):
    pattern = r'\n\d+\.\s(.*?)(?=\n\d+\.\s|$)'
    matches = re.findall(pattern, text, re.DOTALL)
    matches = [str(i) + ". " + match.strip() for i, match in enumerate(matches, start=1)]
    return matches

# Function to split rows based on the result column
def split_rows_based_on_result(df, column_name):
    new_rows = []

    # Iterate over the DataFrame rows
    for idx, row in df.iterrows():
        result_list = get_inconsistencies(row[column_name])  # Get the list of inconsistencies from the 'Result' column

        # For each element in the result list, create a new row
        for i, result in enumerate(result_list):
            new_row = row.copy()  # Copy the existing row
            new_row[column_name] = result  # Set the 'Result' column to the current inconsistency
            new_row['inconsistency_no'] = i  # Set the 'inconsistency_no' to the index of the inconsistency
            new_rows.append(new_row)  # Add the new row to the list

    # Create a new DataFrame from the new rows
    new_df = pd.DataFrame(new_rows)

    return new_df

def process_dataframe(df):
    # Step 1: Split the DataFrame into df_no and df_yes based on "Result" column
    df_no = df[df['Result'].str.startswith("Inconsistencies detected: No", na=False)]
    df_yes = df[df['Result'].str.startswith("Inconsistencies detected: Yes", na=False)]

    # Step 3: Check if the sum of the lengths of 'df_no' and 'df_yes' match the length of the original DataFrame
    if len(df_no) + len(df_yes) != len(df):
        unexpected_values = set(df['Result']) - set(df_no['Result']).union(set(df_yes['Result']))

        # Output the rows that have these unexpected values for debugging
        print(f"Unexpected values in 'Result' column: {unexpected_values}")

        # Find and display the rows with those unexpected values
        unexpected_rows = df[df['Result'].isin(unexpected_values)]
        print("\nRows with unexpected 'Result' values:")
        print(unexpected_rows)
        raise ValueError("Error: The sum of 'No' and 'Yes' categories does not match the original DataFrame's length.")

    # Step 2: Split df_yes into multiple rows for each inconsistency
    df_yes_split = split_rows_based_on_result(df_yes, "Result")

    # Step 3: Add "Annotation" column with "Unknown" value for df_no and df_yes_split
    df_no['Precision Annotation'] = '[]' * len(df_no)
    df_yes_split['Precision Annotation'] = 'Unknown' * len(df_yes_split)

    # Step 4: Add an "Index" column with latest index (this will create a sequential index column)
    df_no['Index'] = df_no.index
    df_yes_split['Index'] = df_yes_split.index

    # Step 5: Add the "Annotation" column to the original DataFrame as well
    df['Recall Annotation'] = 'Unknown' * len(df)
    df['Index'] = df.index  # Adding Index column to original df as well

    return df_no, df_yes_split, df

# Function to save DataFrames to a folder
def save_dataframes_to_folder(df_no, df_yes_split, df, folder_path, dataframe_name):
    # Create a directory for the DataFrames if it does not exist
    os.makedirs(os.path.join(folder_path, f"{dataframe_name}"), exist_ok=True)
    
    # Save the DataFrames to CSV files in the folder
    df_no.to_csv(os.path.join(folder_path, f"{dataframe_name}/{dataframe_name}_df_no.csv"), index=False)
    df_yes_split.to_csv(os.path.join(folder_path, f"{dataframe_name}/{dataframe_name}_df_yes_split.csv"), index=False)
    df.to_csv(os.path.join(folder_path, f"{dataframe_name}/{dataframe_name}_recall.csv"), index=False)

# Main function to process and save all DataFrames in a list
def process_and_save_all(dfs, folder_path):
    for dataframe_name, dataframe in dfs.items():

        # Process the dataframe
        df_no, df_yes_split, df = process_dataframe(dataframe)
        print(dataframe_name, len(df_no), len(df_yes_split), len(df))
        # display(df_yes_split.head())
        
        # Save the DataFrames to the folder
        save_dataframes_to_folder(df_no, df_yes_split, df, folder_path, dataframe_name)
        print(f"Processed and saved {dataframe_name} to {folder_path}/{dataframe_name}")

In [16]:
process_and_save_all(dfs, "./")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_no['Precision Annotation'] = '[]' * len(df_no)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_no['Index'] = df_no.index


llama3_new 4 99 23
Processed and saved llama3_new to .//llama3_new
deepseek 10 588 115


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_no['Precision Annotation'] = '[]' * len(df_no)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_no['Index'] = df_no.index


Processed and saved deepseek to .//deepseek


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_no['Precision Annotation'] = '[]' * len(df_no)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_no['Index'] = df_no.index


gpt_new 9 564 115
Processed and saved gpt_new to .//gpt_new


# Baseline Annotation