In [1]:
# dependencies
import yaml
import numpy as np
import pandas as pd
from fuzzywuzzy import fuzz

In [2]:
# support methods
def read_yaml(fname):
    with open(fname, 'r') as f_handle:
        out = yaml.safe_load(f_handle)
    return out


def make_pairs(tests):
    pairs  = [(a, b) for a in tests for b in tests]
    assert len(pairs) == (len(tests)**2)
    return pairs


def make_scores(pair):
    l = pair[0]
    r = pair[1]
    out = {
        "ratio": fuzz.ratio(l, r),
        "partial_ratio": fuzz.partial_ratio(l, r),
        "token_sort_ratio": fuzz.token_sort_ratio(l, r),
        "QRatio": fuzz.QRatio(l, r),
        "UQRatio": fuzz.UQRatio(l, r),
        "WRatio": fuzz.WRatio(l, r),
        "UWRatio": fuzz.UWRatio(l, r)
    }
    out["mean"] = np.mean([val for val in out.values()])
    out["string_1"] = l
    out["string_2"] = r
    return out

In [8]:
# main
tests = read_yaml("input/string_tests.yaml")
test_pairs = make_pairs(tests)
scores = [make_scores(pair) for pair in test_pairs]
score_df = pd.DataFrame.from_dict(scores)[[
    'string_1',
    'string_2',
    'mean',
    'ratio',
    'partial_ratio',
    'token_sort_ratio',
    'QRatio',
    'UQRatio',
    'WRatio',
    'UWRatio']]

In [9]:
score_df

Unnamed: 0,token_1,token_2,mean,ratio,partial_ratio,token_sort_ratio,QRatio,UQRatio,WRatio,UWRatio
0,alpha,alpha,100.000000,100,100,100,100,100,100,100
1,alpha,alba,64.571429,67,50,67,67,67,67,67
2,alpha,alvarez,51.428571,50,60,50,50,50,50,50
3,alpha,bravo,20.000000,20,20,20,20,20,20,20
4,alpha,jose,0.000000,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...
59,iose,bravo,22.428571,22,25,22,22,22,22,22
60,iose,jose,75.000000,75,75,75,75,75,75,75
61,iose,jóse,53.000000,50,50,57,57,50,57,50
62,iose,j0se,50.000000,50,50,50,50,50,50,50


In [10]:
def write_yaml(yaml_file, data):
    with open(yaml_file, 'w') as f:
        yaml.dump(data, f, default_flow_style=False)
        f.close()
    print(f'{yaml_file} written successfully')
    return 1

In [13]:
write_yaml(test_pairs

[('alpha', 'alpha'),
 ('alpha', 'alba'),
 ('alpha', 'alvarez'),
 ('alpha', 'bravo'),
 ('alpha', 'jose'),
 ('alpha', 'jóse'),
 ('alpha', 'j0se'),
 ('alpha', 'iose'),
 ('alba', 'alpha'),
 ('alba', 'alba'),
 ('alba', 'alvarez'),
 ('alba', 'bravo'),
 ('alba', 'jose'),
 ('alba', 'jóse'),
 ('alba', 'j0se'),
 ('alba', 'iose'),
 ('alvarez', 'alpha'),
 ('alvarez', 'alba'),
 ('alvarez', 'alvarez'),
 ('alvarez', 'bravo'),
 ('alvarez', 'jose'),
 ('alvarez', 'jóse'),
 ('alvarez', 'j0se'),
 ('alvarez', 'iose'),
 ('bravo', 'alpha'),
 ('bravo', 'alba'),
 ('bravo', 'alvarez'),
 ('bravo', 'bravo'),
 ('bravo', 'jose'),
 ('bravo', 'jóse'),
 ('bravo', 'j0se'),
 ('bravo', 'iose'),
 ('jose', 'alpha'),
 ('jose', 'alba'),
 ('jose', 'alvarez'),
 ('jose', 'bravo'),
 ('jose', 'jose'),
 ('jose', 'jóse'),
 ('jose', 'j0se'),
 ('jose', 'iose'),
 ('jóse', 'alpha'),
 ('jóse', 'alba'),
 ('jóse', 'alvarez'),
 ('jóse', 'bravo'),
 ('jóse', 'jose'),
 ('jóse', 'jóse'),
 ('jóse', 'j0se'),
 ('jóse', 'iose'),
 ('j0se', 'alpha')