In [16]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [17]:
def string_replace(string, str_to_replace=None, replace_with=None) -> str:
    """
    Replace the occurrences of a substring in a given string with a replacement string.

    Args:
        string (str): The input string to be processed.
        str_to_replace (str): The substring to be replaced in the input string. If not provided, defaults to None.
        replace_with (str): The replacement string that will be used to replace the occurrences of the substring. If not provided, defaults to None.

    Returns:
        str: The modified string after replacing the occurrences of the substring with the replacement string.

    Example:
    >>> string_replace("hello world", "o", "0")
    'hell0 w0rld'
    >>> string_replace("foo bar baz", "bar", "qux")
    'foo qux baz'
    """
    string = string.replace(str_to_replace, replace_with)
    return string

def read_write(file:str, list_to_replace=None, list_replace_with=None)-> None:
    """
    Read a file, replace the specified substrings in its lines, and overwrite the original file.

    Args:
        file (str): The path to the file to be read and overwritten.
        list_to_replace (list): A list of substrings to be replaced in each line of the file. If not provided, defaults to None.
        list_replace_with (list): A list of replacement strings that will be used to replace the corresponding substrings in the file. If not provided, defaults to None.

    Returns:
        None: The function overwrites the original file and does not return a value.

    Example:
        Suppose the file "example.txt" contains the following lines:
            The quick brown fox jumps over the lazy dog.
            She sells seashells by the seashore.
            Peter Piper picked a peck of pickled peppers.

        >>> read_write("example.txt", ["e", "o"], ["3", "0"])
        # The lines in "example.txt" will be replaced as follows:
        #   The quick br0wn f3x jumps 0v3r th3 lazy d0g.
        #   Sh3 s3lls s3ash3lls by th3 s3ash0r3.
        #   P3t3r Pip3r pick3d a p3ck of pickl3d p3pp3rs.
    """
    with open(file, 'r') as f:
        lines = f.readlines()
    with open(file, 'w') as f:
        for line in lines:
            for tr, rw in zip(list_to_replace, list_replace_with):
                line = string_replace(string=line, str_to_replace=tr, replace_with=rw)
            f.write(line)


def prop_printer(file:str, n_semicolons:int) -> None:
    """
    Prints the proportion of lines in a file that contain a different number of semicolons than n_semicolons.

    Args:
    - file (str): The name of the file to be processed.
    - n_semicolons (int): The number of semicolons to be searched for in each line.

    Returns:
    - None

    Raises:
    - FileNotFoundError: If the specified file is not found.
    - TypeError: If the specified file is not a string or if n_semicolons is not an integer.

    Example:
    >>> prop_printer('my_file.txt', 2)
    Proportion of flagged lines for my_file.txt: 33.33 %
    """
    list_obj = []
    with open(file, 'r') as f:
        lines = f.readlines()
    for line in lines:
        if line.count(';')!=n_semicolons:
            list_obj.append(line)
    print(f"Proportion of flagged lines for {file}: {len(list_obj)/len(lines)*100} %")

def val_inval_lists(file: str, n_semicolons: int) -> tuple[list[str], list[str]]:
    """
    Separates the lines in a file into two lists based on the number of semicolons in each line.

    Args:
    - file (str): The name of the file to be processed.
    - n_semicolons (int): The number of semicolons to be searched for in each line.

    Returns:
    - A tuple of two lists of strings. The first list contains the lines that have the specified number of semicolons,
      while the second list contains the lines that do not have the specified number of semicolons.

    Raises:
    - FileNotFoundError: If the specified file is not found.
    - TypeError: If the specified file is not a string or if n_semicolons is not an integer.

    Example:
    >>> valid_list, invalid_list = val_inval_lists('my_file.txt', 2)
    >>> print(valid_list)
    ['valid line 1;valid line 2;\n', 'valid line 3;valid line 4;\n']
    >>> print(invalid_list)
    ['invalid line 1;\n', 'invalid line 2;\n', 'invalid line 3;\n']
    """
    valid_list = []
    invalid_list = []
    with open(file, 'r') as f:
        lines = f.readlines()
    for line in lines:
        if line.count(';')== n_semicolons:
            valid_list.append(line)
        else:
            invalid_list.append(line)
    return valid_list, invalid_list

def drop_nonconform_lines(file, n_semicolons:int)->None:
    """
    Drops lines from a file that do not have the specified number of semicolons.

    Args:
    - file (str): The name of the file to be processed.
    - n_semicolons (int): The number of semicolons to be searched for in each line.

    Returns:
    - None

    Raises:
    - FileNotFoundError: If the specified file is not found.
    - TypeError: If the specified file is not a string or if n_semicolons is not an integer.

    Example:
    >>> drop_nonconform_lines('my_file.txt', 2)
    """
    with open(file, 'r') as f:
        lines = f.readlines()
    with open(file, 'w') as f:
        for line in lines:
            if line.count(';')==n_semicolons:
                f.write(line)

In [18]:
chars_to_replace = ['ème', 'è', 'é', '€', 'm²', ' (75)', ' (69)', ' (13)', 'Terrain', 'terrain', 'er']
replace_chars_with_paris = [';', 'e', 'e', 'eur', 'm2', '', '', '', ';']
replace_chars_with = [';', 'e', 'e', 'eur', 'm2;', '', '', '', '','',';']

paris = 'annuaire_paris.txt'
lyon = 'annuaire_lyon.txt'
marseille = 'annuaire_marseille.txt'

read_write(paris, chars_to_replace, replace_chars_with_paris)
read_write(lyon, chars_to_replace, replace_chars_with)
read_write(marseille, chars_to_replace, replace_chars_with)

prop_printer(paris, 2)
prop_printer(lyon, 2)
prop_printer(marseille, 2)

Proportion of flagged lines for annuaire_paris.txt: 2.2712524334847504 %
Proportion of flagged lines for annuaire_lyon.txt: 5.350269438029253 %
Proportion of flagged lines for annuaire_marseille.txt: 11.421098517872712 %
