In [3]:
import pandas as pd
from sklearn.metrics import precision_recall_fscore_support
import pandas as pd

In [7]:
def describe_results(result):
    # Look at a sample evenly distributed among classes
    class_even_result = result.groupby('classification').head(56)

    # Assuming 'pred' and 'labels' are your input series
    precision, recall, f1, _ = precision_recall_fscore_support(
        class_even_result['classification'], 
        class_even_result['pred'], 
        average='weighted'
    )

    print(f'Precision: {precision:.2f}')
    print(f'Recall: {recall:.2f}')
    print(f'F1 Score: {f1:.2f}')

In [2]:
eval_data = pd.read_csv('data/eval/hand_annotated_pairs.csv')
eval_data

Unnamed: 0,Producer Name_x,Producer Name_y,Abbreviation Name_x,Abbreviation Name_y,classification
0,societe cooperative agricole de kouibly,societe cooperative agricole de kouibly,socak scoops,socas,0
1,societe cooperative soubreenne des producteurs...,societe cooperative soubreenne des producteurs...,scspa coop ca,scspa,1
2,societe cooperative agricole de dogbo,societe cooperative agricole de dogbo,socopadogbo coop-ca,socopadogbo,1
3,cooperative agricole des producteurs de divo,cooperative agricole des producteurs de divo,coopapd coop-ca,coopradi,0
4,cooperative agricole la tolerance de bonoufla,cooperative agricole la tolerance de bonoufla,scoops catb,catb,1
...,...,...,...,...,...
310,societe cooperative ewounbo tiassale,ste coop s ewounbo de tiassale,scoops ewounbo tiassale,ewounbo tiassale,1
311,societe cooperative agricole n'zassa de tiassale,ste coop agri n'zassa de tiassale,coop ca- scapen,scapen,1
312,societe cooperative agricole n'zassa de divo,ste coop agri n'zassa de tiassale,coop-ca scapen-divo,scapen,0
313,entreprise cooperative de gabiadji,entr coop de gabiadji,ecoga,ecoga_coop-ca,1


In [43]:
import semantic_similarity as ss

def evaluate_with_similarity(eval_data, similarity_fn, threshold=0.8):
    similarity_results = similarity_fn(
        df=eval_data, 
        column_1='Producer Name_x', 
        column_2='Producer Name_y', 
        sample_size=eval_data.shape[0], 
        new_col_name='similarity'
    )

    similarity_results['pred'] = similarity_results['similarity'] > threshold
    return tf_idf_similarity_results



In [31]:
semantic_similarity_results = evaluate_with_similarity(
    eval_data=eval_data, 
    similarity_fn=ss.process_semantic_similarity,
    threshold=0.88)
describe_results(semantic_similarity_results)

Using device: cpu
Calculating semantic similarity for 315 random rows...
Precision: 0.21
Recall: 0.38
F1 Score: 0.27


In [30]:
second_half_similarity_results = evaluate_with_similarity(
    eval_data=eval_data, 
    similarity_fn=ss.process_second_half_similarity,
    threshold=0.88)
describe_results(tf_idf_similarity_results)

Precision: 0.70
Recall: 0.70
F1 Score: 0.70


In [45]:
tf_idf_similarity_results = evaluate_with_similarity(
    eval_data=eval_data, 
    similarity_fn=ss.process_tf_idf,
    threshold=0.88)
describe_results(tf_idf_similarity_results)

Precision: 0.70
Recall: 0.70
F1 Score: 0.70


In [50]:
gpt_4o_mini_no_examples_result = pd.read_csv('data/outputs/gpt_4o_mini_no_examples_eval.csv')
describe_results(gpt_4o_mini_no_examples_result)

Precision: 0.76
Recall: 0.71
F1 Score: 0.69


In [51]:
gpt_35_turbo_no_examples_result = pd.read_csv('data/outputs/gpt_35_turbo_no_examples_eval.csv')
describe_results(gpt_35_turbo_no_examples_result)

Precision: 0.85
Recall: 0.79
F1 Score: 0.79


In [47]:
# tf_idf_similarity_results
wrong = tf_idf_similarity_results[tf_idf_similarity_results['classification'] != tf_idf_similarity_results['pred']]
print(wrong.shape)
wrong

(34, 7)


Unnamed: 0,Producer Name_x,Producer Name_y,Abbreviation Name_x,Abbreviation Name_y,classification,similarity,pred
3,cooperative agricole des producteurs de divo,cooperative agricole des producteurs de divo,coopapd coop-ca,coopradi,0,1.0,True
6,societe cooperative agricole de soubre,societe cooperative agricole de soubre,socopaso scoops,scasou-coop-ca,0,1.0,True
10,societe cooperative agricole sinikan,societe cooperative agricole sinikan,coopas,sinikan-scoopas,0,1.0,True
19,societe cooperative agricole de dogbo,societe cooperative agricole de dogbo,coop-ca cadogbo,socopadogbo,0,1.0,True
5,societe cooperative agricole sinikan,societe cooperative agricole sinikan,scoopas coop-ca,sinikan-scoopas,0,1.0,True
22,societe cooperative agricole moderne de divo,societe cooperative agricole moderne de divo,coop-ca scamdi,coopamdi,0,1.0,True
0,societe cooperative agricole de kouibly,societe cooperative agricole de kouibly,socak scoops,socas,0,1.0,True
53,societe cooperative agricole de bouafle,societe cooperative agricole de bouafle,so.ca.bo-scoops,socab coop-ca,0,1.0,True
12,societe cooperative agricole de guitry,societe cooperative agricole de guitry,coop-ca socoopgui,coop-ca-socoagui,0,1.0,True
18,societe cooperative agricole de guitry,societe cooperative agricole de guitry,socoopag coop-ca,coop-ca-socoagui,0,1.0,True


In [29]:
wrong = result[result['classification'] != result['pred']]
print(wrong.shape)
wrong

(40, 6)


Unnamed: 0,Producer Name_x,Producer Name_y,Abbreviation Name_x,Abbreviation Name_y,classification,pred
0,societe cooperative agricole de kouibly,societe cooperative agricole de kouibly,socak scoops,socas,0,True
4,cooperative agricole la tolerance de bonoufla,cooperative agricole la tolerance de bonoufla,scoops catb,catb,1,False
5,societe cooperative agricole sinikan,societe cooperative agricole sinikan,scoopas coop-ca,sinikan-scoopas,0,True
12,societe cooperative agricole de guitry,societe cooperative agricole de guitry,coop-ca socoopgui,coop-ca-socoagui,0,True
15,societe cooperative des producteurs agricole ...,societe cooperative des producteurs agricole d...,scoopras coop-ca,coopras,1,False
24,societe cooperative agricole de soubre,societe cooperative agricole de soubre,scasou,scasou-coop-ca,1,False
25,societe cooperative agricole source de guitry,societe cooperative agricole source de guitry,socopasg coop-ca,scoopasg,1,False
26,societe cooperative ivoirienne du negoce des p...,cooperative ivoirienne du negoce des produits ...,scinpa coop ca,scinpa,1,False
28,entreprise cooperative des producteurs agricol...,entreprise cooperative des producteurs agricol...,ecoopak coop-ca,ecoopak coop-ca,1,False
29,societe cooperative des producteurs agricoles ...,cooperative des producteurs agricoles bebitafo...,coop-ca cooprabed,coop-ca-cooprabed,1,False
