In [1]:
import pandas as pd

## Read predictions

In [2]:
def read_predictions(file_path):
    result = pd.read_csv(file_path)
    result = result.drop(['Similarity score'], axis=1)
    return result

# Select the predictions by search terms
bert_records = read_predictions('data/predictions/bert_records.csv')
roberta_records = read_predictions('data/predictions/roberta_records.csv')
sgpt_records = read_predictions('data/predictions/sgpt_records.csv')
xlnet_records = read_predictions('data/predictions/xlnet_records.csv')

bert_taxonomy = read_predictions('data/predictions/bert_taxonomy.csv')
roberta_taxonomy = read_predictions('data/predictions/roberta_taxonomy.csv')
sgpt_taxonomy = read_predictions('data/predictions/sgpt_taxonomy.csv')
xlnet_taxonomy = read_predictions('data/predictions/xlnet_taxonomy.csv')

In [3]:
bert_records.head(10)

Unnamed: 0,Search Term,Record ID,Name,Description
0,depression,5693,Cleidemar Teani,Counselling and Psychotherapy
1,depression,1634,Beaubien Dumont,Art-therapist
2,depression,5147,Keith Marlowe,"Marital therapy, Trauma"
3,depression,3488,Montreal Centre for Anxiety and Depression,Provides therapy for people with anxiety or de...
4,depression,5174,Ms. Sara Saniee,"Counseling and psychotherapy for trauma PTSD,..."
5,depression,668,Ontario Fragile X Support Group,Support group for Fragile X.
6,depression,4315,Plantagenet Family Health Team,Family health team.
7,depression,4382,Counselling,"Counselling services for depression, anxiety, ..."
8,depression,5429,Julian D'Souza,"Counselling for Addictions, Depression, Anxiet..."
9,depression,3822,Crystal Arber,Provides PTSD Counselling using EMDRHelp for f...


## Combine predictions

In [4]:
result_df = pd.DataFrame(columns=['Search Term', 'Record ID', 'Name', 'Description'])
model = []

for (df, model_name) in [(bert_records, 'BERT'), (roberta_records, 'RoBERTa'), (sgpt_records, 'SGPT'), (xlnet_records, 'XLNet')]:
    for index, row in df.iterrows():
        search_term = row['Search Term']
        record_id = row['Record ID']
        
        # Check if the record ID already exists for the search term
        existing_rows = result_df[(result_df['Search Term'] == search_term) & (result_df['Record ID'] == record_id)]

        if not existing_rows.empty:
            model[existing_rows.index[0]].append(model_name)
        else:
            result_df = pd.concat([result_df, row.to_frame().T], ignore_index=True)
            model.append([model_name])

result_df['model'] = model
result_df.to_csv('data/predictions/combined_records.csv', index=False)

In [5]:
result_df = pd.DataFrame(columns=['Search Term', 'Record ID', 'Name', 'Description'])
model = []

for (df, model_name) in [(bert_taxonomy, 'BERT'), (roberta_taxonomy, 'RoBERTa'), (sgpt_taxonomy, 'SGPT'), (xlnet_taxonomy, 'XLNet')]:
    for index, row in df.iterrows():
        search_term = row['Search Term']
        record_id = row['Record ID']
        
        # Check if the record ID already exists for the search term
        existing_rows = result_df[(result_df['Search Term'] == search_term) & (result_df['Record ID'] == record_id)]

        if not existing_rows.empty:
            model[existing_rows.index[0]].append(model_name)
        else:
            result_df = pd.concat([result_df, row.to_frame().T], ignore_index=True)
            model.append([model_name])

result_df['model'] = model
result_df.to_csv('data/predictions/combined_taxonomy.csv', index=False)