In [8]:
# Installs
# pip install fuzzywuzzy
# pip install python-Levenshtein

In [24]:
# Libraries
import pandas as pd
from fuzzywuzzy import fuzz, process

In [None]:
# Path to your data goes here
df = pd.read_csv('chemnet_contactdetails.csv')
names = df['Company Name'].dropna()

# Fuzzy Matching using Levenshtein Distance

In [20]:
def get_fuzzy_matches(company_name: str, name_list: str, threshold: int=85) -> list[tuple[str, int]]:
    """
    Find fuzzy matches for a given company name from a list of company names.
    Only return matches that have a ratio above the threshold and are not an exact match of the input company.
    
    :param company_name: The company name to fuzzy match
    :param name_list: The list of company names to match against
    :param threshold: The similarity threshold for matches (default 85%)
    :return: A list of tuples with matching company names and similarity scores
    """
    # Get fuzzy matches using Levenshtein Distance
    fuzzy_matches = process.extract(company_name, name_list, scorer=fuzz.ratio, limit=5)
    filtered_matches = [match for match in fuzzy_matches if match[1] < 100 and match[1] >= threshold]
    return filtered_matches
    

In [17]:
# Choosing a company to match against
example_compnay = names[0]
example_compnay

'Yancheng Hongtai Bioengineering Co.,Ltd.'

In [25]:
fuzzy_matches = get_fuzzy_matches('Yancheng Hongtai Bioengineering Co.,Ltd.', names, threshold=50)
print("Fuzzy Match List")
for match in fuzzy_matches:
    print(f" Match: {match[0]}, 'Similarity Score: {match[1]}")

Fuzzy Match List
 Match: Yancheng HuaDe (DanCheng) Biological Engineering Co.,Ltd., 'Similarity Score: 74
 Match: Yancheng HuaDe (DanCheng) Biological Engineering Co.,Ltd., 'Similarity Score: 74
 Match: Yancheng Sanhe biochemical co., ltd, 'Similarity Score: 65
