# Adult Dataset Analysis (adult.csv)

In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder


## Load Dataset

In [None]:

# Define column names based on UCI Adult dataset description
columns = ["age", "workclass", "fnlwgt", "education", "education_num",
           "marital_status", "occupation", "relationship", "race", "sex",
           "capital_gain", "capital_loss", "hours_per_week", "native_country", "income"]

# Load adult.csv
adult = pd.read_csv("/mnt/data/5e3dbb41-f3f4-4edd-8d67-ed46aee26ec8.csv",
                    header=None, names=columns, na_values="?", skipinitialspace=True)

adult.head()


## Data Exploration

In [None]:
adult.info()

In [None]:
adult.describe(include='all')

In [None]:
adult.isnull().sum()

## Data Preprocessing

In [None]:

# Drop rows with missing values
adult.dropna(inplace=True)

# Encode categorical variables
le = LabelEncoder()
for col in adult.select_dtypes(include="object"):
    adult[col] = le.fit_transform(adult[col])

adult.head()


## Data Visualization

In [None]:

# Income distribution
sns.countplot(x="income", data=adult)
plt.title("Income Distribution")
plt.show()


In [None]:

# Correlation heatmap
plt.figure(figsize=(12,6))
sns.heatmap(adult.corr(numeric_only=True), annot=True, cmap="coolwarm")
plt.title("Correlation Heatmap")
plt.show()
