In [4]:
# Install required libraries (skip if already installed)
# !pip install pandas scikit-learn matplotlib seaborn plotly dash

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import plotly.express as px
from dash import Dash, html, dcc, Input, Output

# Load dataset
df = pd.read_csv("Employee Attrition Analysis.csv")

# Drop irrelevant columns
df.drop(["EmployeeNumber", "Over18", "StandardHours", "EmployeeCount"], axis=1, inplace=True)

# Encode categorical variables
le = LabelEncoder()
for col in df.select_dtypes(include='object'):
    df[col] = le.fit_transform(df[col])

# Split data
X = df.drop("Attrition", axis=1)
y = df["Attrition"]  # 1 = Yes, 0 = No
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

# Feature importance
importances = pd.Series(model.feature_importances_, index=X.columns).sort_values(ascending=False)
top_features = importances.head(10)

# --------- Build Dashboard ---------

app = Dash()

app.layout = html.Div([
    html.H1("HR Dashboard: Employee Attrition Insights", style={'textAlign': 'center'}),

    dcc.Dropdown(
        id='feature-dropdown',
        options=[{'label': col, 'value': col} for col in top_features.index],
        value='Age',
        style={'width': '50%', 'margin': 'auto'}
    ),

    dcc.Graph(id='feature-graph'),

    html.H3("Top Factors Influencing Attrition", style={'textAlign': 'center'}),
    dcc.Graph(
        figure=px.bar(top_features, title="Top 10 Important Features")
    )
])

@app.callback(
    Output('feature-graph', 'figure'),
    Input('feature-dropdown', 'value')
)
def update_graph(selected_feature):
    fig = px.box(df, x="Attrition", y=selected_feature, color="Attrition",
                 labels={"Attrition": "Attrition (0=No, 1=Yes)"})
    return fig

Accuracy: 0.8809523809523809
[[254   1]
 [ 34   5]]
              precision    recall  f1-score   support

           0       0.88      1.00      0.94       255
           1       0.83      0.13      0.22        39

    accuracy                           0.88       294
   macro avg       0.86      0.56      0.58       294
weighted avg       0.88      0.88      0.84       294

