### Bias & Fairness in Data: Distribution Check
**Description**: Load the Adult Income dataset and check for representation bias by analyzing the distribution of gender across different income levels.

In [None]:
# write your code from here

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Step 1: Load the dataset (auto-download from UCI repo)
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data"
columns = [
    "age", "workclass", "fnlwgt", "education", "education-num", "marital-status",
    "occupation", "relationship", "race", "sex", "capital-gain", "capital-loss",
    "hours-per-week", "native-country", "income"
]

df = pd.read_csv(url, header=None, names=columns, na_values=" ?", skipinitialspace=True)

# Step 2: Clean data
df.dropna(inplace=True)

# Step 3: Analyze gender distribution across income levels
gender_income_counts = df.groupby(['income', 'sex']).size().unstack()
gender_income_percentage = gender_income_counts.div(gender_income_counts.sum(axis=1), axis=0) * 100

print("🔍 Gender Distribution (%) across Income Levels:\n")
print(gender_income_percentage.round(2))

# Step 4: Visualize
plt.figure(figsize=(8, 5))
sns.countplot(data=df, x="income", hue="sex")
plt.title("Gender Representation Across Income Levels")
plt.ylabel("Number of Individuals")
plt.xlabel("Income Category")
plt.legend(title="Gender")
plt.tight_layout()
plt.show()
