In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
import joblib

# Load dataset
df = pd.read_excel(r"C:\Users\Mr\Desktop\adult 3.xlsx")

# Drop unwanted columns
df.drop(columns=['fnlwgt'], inplace=True)

# Handle missing values
df.replace('?', np.nan, inplace=True)
df.dropna(inplace=True)

# Encode categorical features
label_encoders = {}
categorical_cols = df.select_dtypes(include='object').columns.tolist()
categorical_cols.remove('income')

for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# Encode target
target_le = LabelEncoder()
df['income'] = target_le.fit_transform(df['income'])  # 0: <=50K, 1: >50K

# Train-test split
X = df.drop(columns='income')
y = df['income']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

# Save model & encoders
joblib.dump(model, "salary_model.pkl")
joblib.dump(label_encoders, "encoders.pkl")
joblib.dump(target_le, "target_encoder.pkl")

print("✅ Model trained and saved with accuracy:", model.score(X_test, y_test))


✅ Model trained and saved with accuracy: 0.8467661691542289


In [3]:
import gradio as gr
import pandas as pd
import joblib
import numpy as np

# Load trained model and encoders
model = joblib.load("salary_model.pkl")
label_encoders = joblib.load("encoders.pkl")
target_encoder = joblib.load("target_encoder.pkl")

# Input features
input_features = [
    "age", "workclass", "education", "educational-num", "marital-status",
    "occupation", "relationship", "race", "gender",
    "capital-gain", "capital-loss", "hours-per-week", "native-country"
]

def predict_salary(age, workclass, education, edu_num, marital, occupation, relationship, race, gender, gain, loss, hours, country):
    input_dict = {
        "age": int(age),
        "workclass": workclass,
        "education": education,
        "educational-num": int(edu_num),
        "marital-status": marital,
        "occupation": occupation,
        "relationship": relationship,
        "race": race,
        "gender": gender,
        "capital-gain": int(gain),
        "capital-loss": int(loss),
        "hours-per-week": int(hours),
        "native-country": country
    }

    # Encode categorical inputs
    for col in label_encoders:
        le = label_encoders[col]
        input_dict[col] = le.transform([input_dict[col]])[0]

    # Create input DataFrame
    input_df = pd.DataFrame([input_dict])
    prediction = model.predict(input_df)[0]
    income_label = target_encoder.inverse_transform([prediction])[0]

    return f"🧾 Predicted Income: {income_label}"

# Gradio interface
iface = gr.Interface(
    fn=predict_salary,
    inputs=[
        gr.Number(label="Age"),
        gr.Dropdown(choices=label_encoders['workclass'].classes_.tolist(), label="Workclass"),
        gr.Dropdown(choices=label_encoders['education'].classes_.tolist(), label="Education"),
        gr.Number(label="Educational Number"),
        gr.Dropdown(choices=label_encoders['marital-status'].classes_.tolist(), label="Marital Status"),
        gr.Dropdown(choices=label_encoders['occupation'].classes_.tolist(), label="Occupation"),
        gr.Dropdown(choices=label_encoders['relationship'].classes_.tolist(), label="Relationship"),
        gr.Dropdown(choices=label_encoders['race'].classes_.tolist(), label="Race"),
        gr.Dropdown(choices=label_encoders['gender'].classes_.tolist(), label="Gender"),
        gr.Number(label="Capital Gain"),
        gr.Number(label="Capital Loss"),
        gr.Number(label="Hours Per Week"),
        gr.Dropdown(choices=label_encoders['native-country'].classes_.tolist(), label="Native Country")
    ],
    outputs=gr.Text(label="Prediction Result"),
    title="Employee Salary Predictor",
    description="Enter the employee details to predict if income is <=50K or >50K."
)

iface.launch(share=True)


* Running on local URL:  http://127.0.0.1:7860
* Running on public URL: https://522902798d31f34b3a.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


