In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler

# Load dataset
data = pd.read_csv("adult11.csv")

# Select numerical features
X = data[["age", "education-num", "hours-per-week", "capital-gain", "capital-loss"]]

# Target variable
y = data["salary"]

# Convert income to binary (<=50K = 0, >50K = 1)
y = y.map({"<=50K": 0, ">50K": 1})

# Combine X and y, drop rows with any NaN values, then separate them
# This ensures consistency between X and y and handles NaNs in both
combined_data = pd.concat([X, y], axis=1)
combined_data.dropna(inplace=True)
X = combined_data.drop("salary", axis=1)
y = combined_data["salary"]

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Feature scaling (important for KNN)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Create KNN model
model = KNeighborsClassifier(n_neighbors=5)

# Train model
model.fit(X_train, y_train)

# Predict on test data
predictions = model.predict(X_test)

print("Model Accuracy:", model.score(X_test, y_test))

# --- FIX START ---
# To predict for a new individual, create a DataFrame with their features
# Assuming 'Bachelors' corresponds to education-num = 13
new_person_features = pd.DataFrame([[
    45, # age
    13, # education-num (for Bachelors)
    45, # hours-per-week
    0,  # capital-gain
    0   # capital-loss
]], columns=X.columns)

# Scale the new features
scaled_new_person = scaler.transform(new_person_features)

# Predict the salary for the new person
predicted_salary_code = model.predict(scaled_new_person)[0]

# Map the prediction back to readable format
predicted_salary_label = ">50K" if predicted_salary_code == 1 else "<=50K"

print("Predicted Salary for new individual:", predicted_salary_label)
# --- FIX END ---

Model Accuracy: 0.8064460232195461
Predicted Salary for new individual: >50K
