# Fuzzy Mapping

In [2]:
import pandas as pd
from fuzzywuzzy import process

# Load the CSV file
df = pd.read_csv('files/politician_sentiments.csv')

def normalize_politician_names(names):
    unique_names = set(names) 
    name_mapping = {}
    for name in unique_names:
        if name not in name_mapping:
            similar_names = process.extract(name, unique_names, limit=None)
            for similar_name, score in similar_names:
                if score >= 80 and similar_name != name:
                    name_mapping[similar_name] = name

    return names.map(name_mapping).fillna(names)

def normalize_nicknames(names):
    nicknames = {
        'Bongbong Marcos Jr.': 'Ferdinand Marcos Jr.',
        'Ferdinand Bongbong Marcos Jr.': 'Ferdinand Marcos Jr.',
        'Apollo C. Quiboloy': 'Apollo Quiboloy',
    }
    def normalize_name(name):
        if name in nicknames:
            return nicknames[name]
        return name
    return names.map(normalize_name)

df['politician'] = normalize_politician_names(df['politician'])
df['politician'] = normalize_nicknames(df['politician'])
print(df['politician'].unique())
df.to_csv('files/normalized_politicians.csv', index=False)


['Ferdinand R. Marcos' 'Rodrigo Duterte' 'Donald Trump' 'Kamala Harris'
 'Kiefer Ravena' 'LeBron James' 'Sara Duterte' 'Alejandro Tengco'
 'Joel Chua' 'Jonvic Remulla' 'Juan Ponce Enrile' 'Menardo Guevarra'
 'Winston Casio' 'Chiz Escudero' 'Ferdinand Marcos Jr.'
 'Israelito Torreon' 'J-Hope' 'Leni Robredo' 'Liam Payne' 'Lorraine Badoy'
 'Jonathan Andal' 'Luistro' 'Michael Poa' 'Nolasco Mempin'
 'Ernesto Dionisio Jr.' 'Gloria Camora' 'Alan Tanjuakio' 'Chris Sorongon'
 'Edward Fajarda' 'Gina Acosta' 'Jiezel Cone Asia' 'Juleita Villadelrey'
 'Kelvin Gerome Teñido' 'Lemuel Ortonio' 'Ma. Constancia-Lim'
 'Maria Laiza Pamittan-Frogoso' 'Maximo Alexis Tan' 'Norman Baloro'
 'Regina Rosa Tecson' 'Rolmar Basalan' 'Rosalynne Sanchez' 'Winnie Dayego'
 'Zuleika Lopez' 'Bam Aquino' 'Paolo Ortega' 'Camille Villar' 'Lito Lapid'
 'Tito Sotto' 'Analiza Tabujara-Soriano' 'Areiz Macaraig' 'Butch Borja'
 'Elmer Baldemoro' 'Jose Perez' 'Frank Mendoza' 'Gabriel Bordado Jr.'
 'Gayle Abonal-Gomez' 'Jess Albeus