In [1]:
!pip install -q gradio imbalanced-learn

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import gradio as gr
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, roc_auc_score
from imblearn.over_sampling import RandomOverSampler
warnings.filterwarnings('ignore')
from google.colab import files
uploaded = files.upload()
filename = list(uploaded.keys())[0]
data = pd.read_csv(filename)
data.drop_duplicates(inplace=True)
data['Attrition'] = data['Attrition'].replace({'No': 0, 'Yes': 1})
data['OverTime'] = data['OverTime'].map({'No': 0, 'Yes': 1})
data['Gender'] = data['Gender'].map({'Male': 0, 'Female': 1})
encoding_cols = ['BusinessTravel','Department','EducationField','JobRole']
label_encoders = {}
for col in encoding_cols:
    le = LabelEncoder()
    data[col] = le.fit_transform(data[col])
    label_encoders[col] = le
X = data.drop(['Attrition'], axis=1)
y = data['Attrition'].values
X = pd.get_dummies(X, drop_first=True)
ros = RandomOverSampler(random_state=42)
X_res, y_res = ros.fit_resample(X, y)
X_train, X_test, y_train, y_test = train_test_split(X_res, y_res, test_size=0.2, random_state=42)
logreg = LogisticRegression(max_iter=500)
logreg.fit(X_train, y_train)
print("Model trained successfully!")
print("Accuracy:", accuracy_score(y_test, logreg.predict(X_test)))
def predict_attrition(BusinessTravel, Department, JobRole, Gender, OverTime,
                      Age, DistanceFromHome, EnvironmentSatisfaction,
                      JobSatisfaction, MonthlyIncome, NumCompaniesWorked,
                      PercentSalaryHike, PerformanceRating,TotalWorkingYears,
                      TrainingTimesLastYear, WorkLifeBalance, YearsAtCompany):
    input_data = pd.DataFrame({
        'BusinessTravel': [label_encoders['BusinessTravel'].transform([BusinessTravel])[0]],
        'Department': [label_encoders['Department'].transform([Department])[0]],
        'JobRole': [label_encoders['JobRole'].transform([JobRole])[0]],
        'Gender': [0 if Gender == 'Male' else 1],
        'OverTime': [0 if OverTime == 'No' else 1],
        'Age': [Age],
        'DistanceFromHome': [DistanceFromHome],
        'EnvironmentSatisfaction': [EnvironmentSatisfaction],
        'JobSatisfaction': [JobSatisfaction],
        'MonthlyIncome': [MonthlyIncome],
        'NumCompaniesWorked': [NumCompaniesWorked],
        'PercentSalaryHike': [PercentSalaryHike],
        'PerformanceRating': [PerformanceRating],
        'TotalWorkingYears': [TotalWorkingYears],
        'TrainingTimesLastYear': [TrainingTimesLastYear],
        'WorkLifeBalance': [WorkLifeBalance],
        'YearsAtCompany': [YearsAtCompany]
    })
    input_data = pd.get_dummies(input_data, drop_first=True).reindex(columns=X.columns, fill_value=0)
    pred = logreg.predict(input_data)[0]
    prob = logreg.predict_proba(input_data)[0][1]
    result = "Likely to Leave " if pred == 1 else "Likely to Stay "
    return f"{result} (Probability: {prob:.2f})"
categorical_choices = {
    "BusinessTravel": label_encoders['BusinessTravel'].classes_.tolist(),
    "Department": label_encoders['Department'].classes_.tolist(),
    "EducationField": label_encoders['EducationField'].classes_.tolist(),
    "JobRole": label_encoders['JobRole'].classes_.tolist(),
}
demo = gr.Interface(
    fn=predict_attrition,
    inputs=[
        gr.Dropdown(categorical_choices['BusinessTravel'], label="BusinessTravel"),
        gr.Dropdown(categorical_choices['Department'], label="Department"),
        gr.Dropdown(categorical_choices['JobRole'], label="JobRole"),
        gr.Dropdown(['Male', 'Female'], label="Gender"),
        gr.Dropdown(['No', 'Yes'], label="OverTime"),
        gr.Number(label="Age"),
        gr.Number(label="DistanceFromHome"),
        gr.Number(label="EnvironmentSatisfaction"),
        gr.Number(label="JobSatisfaction"),
        gr.Number(label="MonthlyIncome"),
        gr.Number(label="NumCompaniesWorked"),
        gr.Number(label="PercentSalaryHike"),
        gr.Number(label="PerformanceRating"),
        gr.Number(label="TotalWorkingYears"),
        gr.Number(label="TrainingTimesLastYear"),
        gr.Number(label="WorkLifeBalance"),
        gr.Number(label="YearsAtCompany"),
    ],
    outputs="text",
    title="Employee Attrition Prediction",
    description="Predict whether an employee is likely to leave or stay based on input attributes."
)
demo.launch()


Saving Employee_Attrition.csv to Employee_Attrition.csv
Model trained successfully!
Accuracy: 0.6558704453441295
It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://112ef18d86449af063.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


