In [15]:
import numpy as np
import pandas as pd

messy_data = {
    'name': ['Alice', 'Bob', 'Charlie', 'David', 'Emily', 'Frank', 'Grace'],
    'age': [25, 30, np.nan, 'thirty-five', 22, 45, 'unknown'],
    'gender': ['Female', 'Male', 'Male', 'Male', 'Female', 'Other', 'Male'],
    'country': ['USA', 'Canada', 'Mexico', 'USA', 'Australia', 'Unknown', 'UK'],
    'monthly_salary': [50000, 75000, '60k', np.nan, 40000, 'unknown', 80000],
    'annual_income': ['60,000', '90,000', np.nan, '100,000', '40k', '75k', 'unknown']
}

# Import messy data into a Pandas DataFrame
messy_df = pd.DataFrame(messy_data)

# Handling non-numeric data types and missing values:
messy_df['age'] = pd.to_numeric(messy_df['age'].replace('thirty-five', 35), errors='coerce')
messy_df['age'].fillna(messy_df['age'].median(), inplace=True)
messy_df.fillna({'country': 'Unknown', 'gender': 'Other'}, inplace=True)

# Handling inconsistent annual income format:
messy_df['annual_income'] = pd.to_numeric(messy_df['annual_income'].str.replace('[^\d.]', '', regex=True), errors='coerce')
messy_df['annual_income'] *= 1000  # Multiply by 1000 to convert 'k' to 000

# Remove dependent column
messy_df.drop('monthly_salary', axis=1, inplace=True)

# Convert 'Other' to 'Non-binary'
messy_df['gender'].replace('Other', 'Non-binary', inplace=True)

print("ORIGINAL DATA:")
print(messy_data)
print()
print("CLEANED DATA:")
print(messy_df)


ORIGINAL DATA:
{'name': ['Alice', 'Bob', 'Charlie', 'David', 'Emily', 'Frank', 'Grace'], 'age': [25, 30, nan, 'thirty-five', 22, 45, 'unknown'], 'gender': ['Female', 'Male', 'Male', 'Male', 'Female', 'Other', 'Male'], 'country': ['USA', 'Canada', 'Mexico', 'USA', 'Australia', 'Unknown', 'UK'], 'monthly_salary': [50000, 75000, '60k', nan, 40000, 'unknown', 80000], 'annual_income': ['60,000', '90,000', nan, '100,000', '40k', '75k', 'unknown']}

CLEANED DATA:
      name   age      gender    country  annual_income
0    Alice  25.0      Female        USA     60000000.0
1      Bob  30.0        Male     Canada     90000000.0
2  Charlie  30.0        Male     Mexico            NaN
3    David  35.0        Male        USA    100000000.0
4    Emily  22.0      Female  Australia        40000.0
5    Frank  45.0  Non-binary    Unknown        75000.0
6    Grace  30.0        Male         UK            NaN
