In [4]:
from datetime import datetime


def correct_date_format(date):
    try:
        # Attempt to parse the date in various formats
        parsed_date = datetime.strptime(date, "%d-%m-%Y")
        corrected_date = parsed_date.strftime("%Y-%m-%d")
        return corrected_date
    except ValueError:
        return None


def calculate_age(date_of_birth):
    try:
        # Calculate the age based on the date of birth
        dob = datetime.strptime(date_of_birth, "%Y-%m-%d")
        today = datetime.today()
        age = today.year - dob.year
        if today.month < dob.month or (today.month == dob.month and today.day < dob.day):
            age -= 1
        return age
    except ValueError:
        return None


def clean_data(dataset):
    cleaned_dataset = []
    seen_emails = set()

    for entry in dataset:
        name, age, dob, email = entry.split(',')

        # Check if the email has been encountered before
        if email in seen_emails:
            continue

        # Correct the date format
        corrected_dob = correct_date_format(dob)
        if corrected_dob is not None:
            dob = corrected_dob

        # Calculate the age if it is missing
        if age == "":
            age = calculate_age(dob)

        # Skip the entry if the date of birth is still invalid or age calculation failed
        if dob is None or age is None:
            continue

        # Add the cleaned entry to the dataset
        cleaned_dataset.append(f"{name},{age},{dob},{email}")
        seen_emails.add(email)

    return cleaned_dataset


def remove_entries_without_dob(dataset):
    cleaned_dataset = []
    for entry in dataset:
        name, age, dob, email = entry.split(',')
        if dob != '':
            cleaned_dataset.append(entry)
    return cleaned_dataset


title = "--- Hospital Database of Employees ---"

# Calculate the total width of the output
total_width = 60

# Calculate the left and right padding for center alignment
left_padding = (total_width - len(title)) // 2
right_padding = total_width - len(title) - left_padding

# Create the centered title string
centered_title = " " * left_padding + title.upper() + " " * right_padding

# Create the line separator
line_separator = "*" * total_width

# Print the line separator, centered title, and another line separator
print(line_separator)
print(centered_title)
print(line_separator)
print("\n\n")


# Read data from the file
file_path = "employee_data.txt"
with open(file_path, 'r') as file:
    data = [line.strip() for line in file.readlines()]

cleaned_data = clean_data(data)
dob_removed_data = remove_entries_without_dob(cleaned_data)

# Print the cleaned data as a table
table_headers = ["Name", "Age", "Date of Birth", "Email"]
table_data = [entry.split(',') for entry in dob_removed_data]

# Calculate the maximum width for each column
column_widths = [max(len(str(row[i])) for row in table_data) for i in range(len(table_headers))]

# Print the table headers
header_row = " | ".join(header.ljust(width) for header, width in zip(table_headers, column_widths))
print(header_row)

# Print the separator line
separator_line = "-+-".join("-" * width for width in column_widths)
print(separator_line)

# Print the table data
for row in table_data:
    if len(row) == len(table_headers):  # Check if the row has the expected number of values
        formatted_row = " | ".join(cell.ljust(width) for cell, width in zip(row, column_widths))
        print(formatted_row)

print("\n\n-----------------")
print("***Bar Chart***")
print("-----------------\n")

# Count the number of entries in each age group
age_groups = {
    "10-20": 0,
    "21-30": 0,
    "31-40": 0,
    "41-50": 0
}

for entry in dob_removed_data:
    _, age, _, _ = entry.split(',')
    if age != '' and age != 'Age':  # Skip the entry if age is 'Age'
        try:
            age = int(age)
            if 10 <= age <= 20:
                age_groups["10-20"] += 1
            elif 21 <= age <= 30:
                age_groups["21-30"] += 1
            elif 31 <= age <= 40:
                age_groups["31-40"] += 1
            elif 41 <= age <= 50:
                age_groups["41-50"] += 1
        except ValueError:
            continue


# Prepare the data for the bar chart
age_group_labels = list(age_groups.keys())
age_group_counts = list(age_groups.values())

# Find the maximum count for scaling the bar chart
max_count = max(age_group_counts)

# Define the ASCII symbols for representing each person
symbol = "*"

# Create the bar chart
for label, count in zip(age_group_labels, age_group_counts):
    scaled_count = int(count * 8 / max_count)  # Scale the count to fit within 40 characters
    bar = symbol * scaled_count
    print(f"{label} | {bar} ({count})")


# Save the updated database to a new file
updated_file_path = "Updated_database.txt"
with open(updated_file_path, 'w') as file:
    file.write(header_row + "\n")
    file.write(separator_line + "\n")
    for row in table_data:
        if len(row) == len(table_headers):  # Check if the row has the expected number of values
            formatted_row = " | ".join(cell.ljust(width) for cell, width in zip(row, column_widths))
            file.write(formatted_row + "\n")

print(f"\nUpdated database saved to {updated_file_path}.")


************************************************************
           --- HOSPITAL DATABASE OF EMPLOYEES ---           
************************************************************



Name           | Age | Date of Birth | Email              
---------------+----+------------+--------------------
John Smith     | 25 | 1998-05-12 | john@example.com   
Jane Doe       | 30 | 1992-09-21 | jane@example.com   
Emily Wilson   | 28 | 1985-12-01 | emily@example.com  
Michael Brown  | 28 | 1995-04-10 | michael@example.com
Sarah Anderson | 35 | 1980-10-25 | sarah@example.com  
David Miller   | 42 | 1978-06-03 | david@example.com  
Daniel Harris  | 33 | 1990-03-18 | daniel@example.com 
Alice Johnson  | 32 | 1990-07-15 | alice@example.com  


-----------------
***Bar Chart***
-----------------

10-20 |  (0)
21-30 | ******** (4)
31-40 | ****** (3)
41-50 | ** (1)

Updated database saved to Updated_database.txt.
