## Step 1: Import Libraries

In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


## Step 2: Load and Explore Dataset

In [None]:

df = pd.read_csv("adult 3.csv")
print(df.head())
print(df.info())


## Step 3: Data Cleaning

In [None]:

# Replace '?' with NaN and drop those rows
df.replace(" ?", np.nan, inplace=True)
df.dropna(inplace=True)
df.reset_index(drop=True, inplace=True)


## Step 4: Encode Categorical Variables

In [None]:

le = LabelEncoder()
for col in df.select_dtypes(include=['object']).columns:
    df[col] = le.fit_transform(df[col])


## Step 5: Split Data

In [None]:

X = df.drop("income", axis=1)
y = df["income"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


## Step 6: Train Random Forest Classifier

In [None]:

rfc = RandomForestClassifier(n_estimators=100, random_state=42)
rfc.fit(X_train, y_train)


## Step 7: Evaluate the Model

In [None]:

y_pred = rfc.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))


## Step 8: (Optional) Save Model for Deployment

In [None]:

import joblib
joblib.dump(rfc, "salary_predictor_model.pkl")
