In [1]:
!pip install gradio

Collecting gradio
  Downloading gradio-5.20.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.11-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.7.2 (from gradio)
  Downloading gradio_client-1.7.2-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting markupsafe~=2.0 (from gradio)
  Downloading MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.0 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3

In [2]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
import gradio as gr
import os

# Load and preprocess the dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data"
columns = ['age', 'workclass', 'fnlwgt', 'education', 'education-num',
           'marital-status', 'occupation', 'relationship', 'race',
           'sex', 'capital-gain', 'capital-loss', 'hours-per-week', 'native-country', 'target']
df = pd.read_csv(url, header=None, names=columns, na_values=' ?', skipinitialspace=True)
df.dropna(inplace=True)
df['target'] = df['target'].apply(lambda x: 1 if x == '>50K' else 0)
df = pd.get_dummies(df, columns=df.select_dtypes(include=['object']).columns, drop_first=True)

# Split the dataset
X = df.drop('target', axis=1)
y = df['target']

# Train the Random Forest Classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X, y)

# Define log file path
log_file_path = "income_prediction_log.csv"

# Check if the log file exists and write header if it doesn't
if not os.path.isfile(log_file_path):
    with open(log_file_path, 'w') as f:
        f.write(','.join(X.columns.insert(0, 'prediction')) + '\n')

# Function to make predictions and log them
def predict_income(age, workclass, fnlwgt, education, education_num, marital_status, occupation, relationship, race, sex, capital_gain, capital_loss, hours_per_week, native_country):
    features = {col: 0 for col in X.columns}  # Initialize all to 0
    features["age"] = age
    features["fnlwgt"] = fnlwgt
    features["education-num"] = education_num
    features["capital-gain"] = capital_gain
    features["capital-loss"] = capital_loss
    features["hours-per-week"] = hours_per_week

    # Setting dummies to 1 only for selected features
    features["workclass_" + workclass] = 1
    features["education_" + education] = 1
    features["marital-status_" + marital_status] = 1
    features["occupation_" + occupation] = 1
    features["relationship_" + relationship] = 1
    features["race_" + race] = 1
    features["sex_" + sex] = 1
    features["native-country_" + native_country] = 1

    # Create a DataFrame for the input features
    input_data = pd.DataFrame([features], columns=X.columns)

    # Make the prediction
    prediction = model.predict(input_data)[0]
    prediction_label = "Income > $50K" if prediction == 1 else "Income <= $50K"

    # Log the input data and prediction to CSV
    with open(log_file_path, 'a') as f:
        # Prepare the log line
        log_line = [prediction_label] + list(features.values())
        f.write(','.join(map(str, log_line)) + '\n')

    return prediction_label

# Define the input features
input_features = [
    gr.Slider(minimum=0, maximum=100, label="Age"),
    gr.Dropdown(choices=["Private", "Self-emp-not-inc", "Self-emp-inc", "Federal-gov",
                         "Local-gov", "State-gov", "Without-pay", "Never-worked"], label="Work Class"),
    gr.Number(label="Final Weight"),
    gr.Dropdown(choices=["Bachelors", "Some-college", "11th", "HS-grad", "Prof-school",
                         "Assoc-acdm", "Assoc-voc", "9th", "7th-8th", "6th",
                         "5th", "4th", "3rd", "2nd", "1st", "Preschool"], label="Education"),
    gr.Slider(minimum=1, maximum=16, label="Education Number"),
    gr.Dropdown(choices=["Married-civ-spouse", "Divorced", "Never-married", "Separated",
                         "Widowed", "Married-spouse-absent", "Married-AF-spouse"], label="Marital Status"),
    gr.Dropdown(choices=["Tech-support", "Craft-repair", "Other-service", "Sales",
                         "Exec-managerial", "Prof-specialty", "Handlers-cleaners",
                         "Machine-op-inspct", "Adm-clerical", "Farming-fishing",
                         "Transport-moving", "Priv-house-serv", "Protective-serv",
                         "Armed-Forces"], label="Occupation"),
    gr.Dropdown(choices=["Wife", "Own-child", "Husband", "Not-in-family",
                         "Other-relative", "Unmarried"], label="Relationship"),
    gr.Dropdown(choices=["White", "Asian-Pac-Islander", "Amer-Indian-Eskimo",
                         "Other", "Black"], label=" Race"),
    gr.Dropdown(choices=["Female", "Male"], label="Sex"),
    gr.Slider(minimum=0, maximum=100000, label="Capital Gain"),
    gr.Slider(minimum=0, maximum=5000, label="Capital Loss"),
    gr.Slider(minimum=1, maximum=99, label="Hours per Week"),
    gr.Dropdown(choices=["United-States", "Cambodia", "England", "Puerto-Rico",
                         "Canada", "Germany", "Outlying-US(Guam-USVI-etc)",
                         "India", "Japan", "Greece", "South", "China",
                         "Cuba", "Iran", "Honduras", "Philippines", "Italy",
                         "Poland", "Jamaica", "Vietnam", "Mexico", "Portugal",
                         "Ireland", "France", "Dominican-Republic", "Laos",
                         "Ecuador", "Taiwan", "Haiti", "Columbia", "Hungary",
                         "Guatemala", "Nicaragua", "Scotland", "Thailand",
                         "Yugoslavia", "El-Salvador", "Trinidad&Tobago", "Peru",
                         "Hong", "Holand-Netherlands"], label="Native Country")
]

# Define the output
output = gr.Textbox(label="Income Prediction")

# Create the Gradio interface
gr.Interface(fn=predict_income, inputs=input_features, outputs=output,
             title="Income Analysis Using Machine Learning Algorithm",
             description="Your Future Analyzed: Income Prediction Made easier").launch()

Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://4567fc5476546f85a7.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


