In [1]:
import pandas as pd
import numpy as np

def split_csv(file_path):
    # Load the CSV file into a pandas DataFrame
    df = pd.read_csv(file_path, sep=',')

    # Get lists of columns that start with "ACCURATEZZA" and "CONFIDENZA"
    risposteiniziali_cols = [col for col in df.columns if col.startswith('H')]
    confidenzainiziale_cols = [col for col in df.columns if col.startswith('CONF-H')]
    rispostefinali_cols = [col for col in df.columns if col.startswith('FH')]
    confidenzafinali_cols = [col for col in df.columns if col.startswith('CONF-FH')]

    # Create new DataFrames with only the desired columns
    df_risposte_h1 = df[risposteiniziali_cols]
    df_confidenza_h1 = df[confidenzainiziale_cols]
    df_risposte_fh = df[rispostefinali_cols]
    df_confidenza_fh = df[confidenzafinali_cols]

    # Write these DataFrames to new CSV files
    df_risposte_h1.to_csv('h1.csv', index=False, sep=',')
    df_confidenza_h1.to_csv('conf-h1.csv', index=False, sep=',')
    df_risposte_fh.to_csv('fh.csv', index=False, sep=',')
    df_confidenza_fh.to_csv('conf-fh.csv', index=False, sep=',')
    print("split_csv executed")

In [2]:
def compare_csvs(multiline_file_path, singleline_file_path, output_file_path):
    # Load the CSV files into pandas DataFrames
    df_multiline = pd.read_csv(multiline_file_path, sep=',', skiprows=1, header=None)
    df_singleline = pd.read_csv(singleline_file_path, sep=',', skiprows=1, header=None)
    
    print(df_multiline.columns)
    print(df_singleline.columns)

    # Read headers from multiline_file
    headers = pd.read_csv(multiline_file_path, sep=',', nrows=0).columns.tolist()

    # Ensure the single-line DataFrame is broadcastable to the shape of the multi-line DataFrame
    df_singleline = pd.concat([df_singleline]*len(df_multiline), ignore_index=True)

    # Check if cells in both dataframes are empty
    empty_cells = df_multiline.isnull() | df_singleline.isnull()

    # Compare the two DataFrames, considering empty cells
    df_comparison = (df_multiline == df_singleline).astype(int).where(~empty_cells, other=np.nan)

    # Prepend headers to df_comparison
    df_comparison.columns = headers

    # Write the comparison DataFrame to a new CSV file
    df_comparison.to_csv(output_file_path, index=False, sep=',')

    print("compare_csvs executed")

In [3]:
def create_reliances(h1_file_path, ai_file_path, fh_file_path, output_file_path):
    # Load the CSV files into pandas DataFrames, skipping the first row (header)
    df_h1 = pd.read_csv(h1_file_path, sep=',', skiprows=1, header=None)
    df_ai = pd.read_csv(ai_file_path, sep=',', skiprows=1, header=None)
    df_fh = pd.read_csv(fh_file_path, sep=',', skiprows=1, header=None)

    # Create an empty DataFrame for the output
    df_reliances = pd.DataFrame(np.empty((df_h1.shape[0]*df_h1.shape[1], 4)), columns=['id','HD1', 'AI', 'FHD'])

    # Iterate over the rows and columns of df_h1
    for i in range(len(df_h1)):
        for j in range(len(df_h1.columns)):
            # Get the corresponding values from df_h1, df_ai, and df_fh
            df_reliances.loc[i*len(df_h1.columns) + j, "id"] = i
            df_reliances.loc[i*len(df_h1.columns) + j, "HD1"] = df_h1.iloc[i, j]
            df_reliances.loc[i*len(df_h1.columns) + j, "AI"] = df_ai.iloc[0, j]
            df_reliances.loc[i*len(df_h1.columns) + j, "FHD"] = df_fh.iloc[i, j]

    # Write df_reliances to a new CSV file
    df_reliances = df_reliances.astype(int)
    df_reliances.dropna().to_csv(output_file_path, index=False, sep=',')
    return df_reliances
    print("create_reliances executed")

In [4]:
## Define the path to your CSV file
file_path = 'groups'  # replace with your file path
## Call the function
split_csv("./" + file_path + ".csv")



basepath = './'

# Define the paths to your CSV files
multiline_file_path = basepath + 'h1.csv'  # replace with your file path
singleline_file_path = basepath + 'ai.csv'  # replace with your file path

# Define the path to the output CSV file
output_file_path = basepath + 'agreementh1ai.csv'  # replace with your desired output file path

# Call the function
compare_csvs(multiline_file_path, singleline_file_path, output_file_path)

multiline_file_path = basepath + 'h1.csv'  # replace with your file path
singleline_file_path = basepath + 'groundtruth.csv'  # replace with your file path

# Define the path to the output CSV file
output_file_path = basepath + 'accuratezze-h1.csv'  # replace with your desired output file path

# Call the function
compare_csvs(multiline_file_path, singleline_file_path, output_file_path)

multiline_file_path = basepath + 'ai.csv'  # replace with your file path
singleline_file_path = basepath + 'groundtruth.csv'  # replace with your file path

# Define the path to the output CSV file
output_file_path = basepath + 'accuratezze-ai.csv'  # replace with your desired output file path

# Call the function
compare_csvs(multiline_file_path, singleline_file_path, output_file_path)

multiline_file_path = basepath + 'fh.csv'  # replace with your file path
singleline_file_path = basepath + 'groundtruth.csv'  # replace with your file path

# Define the path to the output CSV file
output_file_path = basepath + 'accuratezze-fh.csv'  # replace with your desired output file path

# Call the function
compare_csvs(multiline_file_path, singleline_file_path, output_file_path)



# Define the paths to your CSV files
h1_file_path = basepath + 'accuratezze-h1.csv'  # replace with your file path
ai_file_path = basepath + 'accuratezze-ai.csv'  # replace with your file path
fh_file_path = basepath + 'accuratezze-fh.csv'  # replace with your file path

# Define the path to the output CSV file
output_file_path = basepath + file_path + '_reliances.csv'  # replace with your desired output file path

# Call the function
rels = create_reliances(h1_file_path, ai_file_path, fh_file_path, output_file_path)

split_csv executed
Index([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19], dtype='int64')
Index([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19], dtype='int64')
compare_csvs executed
Index([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19], dtype='int64')
Index([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19], dtype='int64')
compare_csvs executed
Index([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19], dtype='int64')
Index([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19], dtype='int64')
compare_csvs executed
Index([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19], dtype='int64')
Index([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19], dtype='int64')
compare_csvs executed


In [5]:
rels.dropna()

Unnamed: 0,id,HD1,AI,FHD
0,0,0,1,1
1,0,1,1,1
2,0,0,1,0
3,0,1,1,1
4,0,1,1,1
...,...,...,...,...
475,23,1,0,1
476,23,0,1,0
477,23,0,0,0
478,23,0,0,1
