<a href="https://colab.research.google.com/github/SDACCARE/DaccarettData/blob/main/individual_project_4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Import libraries
import pandas as pd
import unicodedata
import requests
from io import StringIO
from IPython.display import display, HTML
import re


# Helper to remove accents
def remove_accents(input_str):
    nfkd_form = unicodedata.normalize('NFKD', input_str)
    return "".join([c for c in nfkd_form if not unicodedata.combining(c)])

#  normalize the names in the dataframe
def normalize_names(dataframe):
    for column in dataframe.columns:
        dataframe[column] = dataframe[column].apply(lambda name: remove_accents(str(name)).lower())
    return dataframe

#  load the data from a URL
def load_data_from_url(url):
    response = requests.get(url)
    csv_data = StringIO(response.text)
    data = pd.read_csv(csv_data, skiprows=2, header=None, names=['FirstName', 'LastName'])
    data = normalize_names(data)
    return data

# highlight search term
def highlight_term(text, term):
    highlighted = re.sub(f"({term})", r"<u><b>\1</b></u>", text, flags=re.IGNORECASE)
    return highlighted

# perform the search
def search_names(dataframe, search_term):
    search_term_normalized = remove_accents(search_term).lower()
    matches = dataframe[dataframe.apply(lambda row: search_term_normalized in remove_accents(row['FirstName']).lower() or
                                                    search_term_normalized in remove_accents(row['LastName']).lower(), axis=1)]
    return matches

# Run the search program in Jupyter Notebook with HTML formatting for bold and underline
def run_search_program_with_html(url):
    data = load_data_from_url(url)

    while True:
        search_term = input("Enter a term and press enter, or 'done' when done: ").strip()
        if search_term.lower() == 'done':
            break

        matches = search_names(data, search_term)
        num_matches = len(matches)

        if num_matches == 0:
            print("No matches found.")
        else:
            print(f"Found '{search_term}' in:")
            html_output = ""
            for _, row in matches.iterrows():
                first_name_highlighted = highlight_term(row['FirstName'].title(), search_term)
                last_name_highlighted = highlight_term(row['LastName'].title(), search_term)
                html_output += f"{last_name_highlighted}, {first_name_highlighted}<br>"

            display(HTML(html_output))
            print(f"{num_matches} {'match' if num_matches == 1 else 'matches'} total.")
        print()

# URL for the CSV file
csv_url = 'https://appserv3.montgomerycollege.edu/CMSC-206/names-raw-sheet.csv'
run_search_program_with_html(csv_url)


Enter a term and press enter, or 'done' when done: Moll
Found 'Moll' in:


1 match total.

Enter a term and press enter, or 'done' when done: -
Found '-' in:


13 matches total.

Enter a term and press enter, or 'done' when done: jess
Found 'jess' in:


2 matches total.

Enter a term and press enter, or 'done' when done: ong
Found 'ong' in:


11 matches total.

Enter a term and press enter, or 'done' when done: sim
Found 'sim' in:


2 matches total.

Enter a term and press enter, or 'done' when done: ova
Found 'ova' in:


11 matches total.

Enter a term and press enter, or 'done' when done: aso
Found 'aso' in:


2 matches total.

Enter a term and press enter, or 'done' when done: aze
Found 'aze' in:


2 matches total.

Enter a term and press enter, or 'done' when done: van der kleij
Found 'van der kleij' in:


1 match total.

