In [9]:
import pandas as pd

In [10]:
# Helper function to create CSV files for testing
def create_csv(content, file_name):
    with open(file_name, 'w') as file:
        file.write(content)

# Test cases
test_cases = {
    # Regular test case
    'users.csv': """username,email
    GabrielDelfin,gabodelfin@tec.com
    Benji,benjamin@teachertec.com
    LuisPaulin,luigi@tec.com""",

    # Test case for an empty document
    'empty_users.csv': """username,email""",

    # Test case for multiple domains
    'multiple_domains_users.csv': """username,email
    luis,luis@tecqro.com
    gabriel,gabriel2@tecmty.com
    delfin,delfin@tecgdl.com
    paulin,paulin@tecqro.com
    benji,benji@tecgdl.com"""
}

# Create CSV files for each test case
for file_name, content in test_cases.items():
    create_csv(content, file_name)

In [11]:
# Function to read CSV and extract data
def read_and_extract_data(file_path):
    # Read CSV file
    df = pd.read_csv(file_path)

    # Extract relevant columns
    usernames = df['username']
    emails = df['email']

    return usernames, emails

# Function to process data
def process_data(usernames, emails):
    # Count number of users
    user_count = len(usernames)

    # Analyze email domains
    domain_counts = emails.str.split('@').str[1].value_counts()

    return user_count, domain_counts

# Function to generate report
def generate_report(user_count, domain_counts, report_path):
    with open(report_path, 'w') as file:
        file.write(f"Total number of users: {user_count}\n")
        file.write("\nEmail domain distribution:\n")
        for domain, count in domain_counts.items():
            file.write(f"{domain}: {count}\n")

# Function to summarize the information
def summarize_report(input_csv, report_path):
    usernames, emails = read_and_extract_data(input_csv)
    user_count, domain_counts = process_data(usernames, emails)
    generate_report(user_count, domain_counts, report_path)
    print(f"Report for {input_csv} generated successfully!")

In [12]:
# Run the summarize function for each test case
for csv_file in test_cases.keys():
    report_file = csv_file.replace('.csv', '_report.txt')
    summarize_report(csv_file, report_file)

# Verify contents of the generated reports
for csv_file in test_cases.keys():
    report_file = csv_file.replace('.csv', '_report.txt')
    with open(report_file, 'r') as file:
        print("-------------------")
        print(f"Contents of {report_file}:")
        print(file.read())
        print("-------------------")

Report for users.csv generated successfully!
Report for empty_users.csv generated successfully!
Report for multiple_domains_users.csv generated successfully!
-------------------
Contents of users_report.txt:
Total number of users: 3

Email domain distribution:
tec.com: 2
teachertec.com: 1

-------------------
-------------------
Contents of empty_users_report.txt:
Total number of users: 0

Email domain distribution:

-------------------
-------------------
Contents of multiple_domains_users_report.txt:
Total number of users: 5

Email domain distribution:
tecqro.com: 2
tecgdl.com: 2
tecmty.com: 1

-------------------
