In [1]:
# IMPORT LIBRARIES
import pandas as pd
from dash import Dash, dcc, html, Input, Output
import plotly.express as px
from databricks import sql

In [2]:
# CONNECTION TO DATA BRICKS
with sql.connect(
    server_hostname="dbc-e7a12703-b692.cloud.databricks.com",
    http_path="/sql/1.0/warehouses/2fdd68999763e1f8",
    access_token="dapib9e38d564313922d4e9b99f8dfeb50e1"
) as connection:
    query = """
        SELECT
            f.employee_id,
            f.full_name,
            d.name  AS Department,
            j.name  AS Job_Title,
            f.hire_date AS Hire_Date,
            l.name  AS Location,
            f.performance_rating AS Performance_Rating,
            f.experience_years AS Experience_Years,
            s.name  AS Status,
            w.name  AS Work_Mode,
            f.annual_salary AS Salary_INR,
            jl.name AS Job_Level,
            f.ingestion_timestamp,
            f.data_hash,
            f.start_effectivity_date,
            f.end_effectivity_date,
            f.is_current
        FROM fact_table_gold_hr_data AS f
            LEFT JOIN dim_department_gold AS d ON f.department_id = d.id
            LEFT JOIN dim_job_title_gold  AS j ON f.job_title_id  = j.id
            LEFT JOIN dim_location_gold   AS l ON f.location_id   = l.id
            LEFT JOIN dim_status_gold     AS s ON f.status_id     = s.id
            LEFT JOIN dim_work_mode_gold  AS w ON f.work_mode_id  = w.id
            LEFT JOIN dim_job_level_gold  AS jl ON f.job_level_id = jl.id
        LIMIT 1000
    """
    df = pd.read_sql(query, connection)

  df = pd.read_sql(query, connection)


In [3]:
# Convert date column
df['Hire_Date'] = pd.to_datetime(df['Hire_Date'], errors='coerce')

# Calculate turnover metrics
total_employees = len(df)
exited = df[df['Status'].str.lower().isin(['resigned', 'terminated', 'left'])].shape[0]
overall_turnover = (exited / total_employees) * 100

# Turnover by department
turnover_by_dept = (
    df.groupby('Department')['Status']
      .apply(lambda x: (x.str.lower().isin(['resigned', 'terminated', 'left']).sum() / len(x)) * 100)
      .reset_index(name='TurnoverRate')
)



In [4]:
# DASH APP LAYOUT
app = Dash(__name__)
app.title = "Employee Analytics Dashboard"

app.layout = html.Div([
    html.H2("Employee Analytics Dashboard", style={'textAlign': 'center'}),

    html.Div([
        html.P(f"Overall Turnover Rate: {overall_turnover:.1f}%", style={'fontSize': '18px'}),
    ], style={'textAlign': 'center'}),

    html.Div([
        
        dcc.Dropdown(
            id='dept-dropdown',
            options=[{'label': dept, 'value': dept} for dept in sorted(df['Department'].unique())],
            value=None,
            placeholder='Select Department (optional)'
        )
    ], style={'width': '40%', 'margin': 'auto'}),

    dcc.Tabs(id='tabs', value='turnover', children=[
        dcc.Tab(label='Turnover Analysis', value='turnover'),
        dcc.Tab(label='Salary Analysis', value='salary'),
        dcc.Tab(label='Experience vs Performance', value='exp_perf'),
    ]),
    html.Div(id='tab-content', style={'padding': '20px'})
])

In [5]:
@app.callback(
    Output('tab-content', 'children'),
    [Input('tabs', 'value'),
     Input('dept-dropdown', 'value')]
)
def render_tab_content(tab, selected_dept):
    # --- TURNOVER TAB ---
    if tab == 'turnover':
        if selected_dept:
            filtered = df[df['Department'] == selected_dept]
            turnover = (
                filtered.groupby('Job_Level')['Status']
                    .apply(lambda x: (x.str.lower().isin(['resigned', 'terminated', 'left']).sum() / len(x)) * 100)
                    .reset_index(name='TurnoverRate')
            )
            fig_turnover = px.bar(
                turnover, x='Job_Level', y='TurnoverRate',
                title=f'Turnover Rate by Job Level – {selected_dept}',
                text=turnover['TurnoverRate'].apply(lambda x: f"{x:.1f}%")
            )
        else:
            fig_turnover = px.bar(
                turnover_by_dept, x='Department', y='TurnoverRate',
                title='Turnover Rate by Department',
                text=turnover_by_dept['TurnoverRate'].apply(lambda x: f"{x:.1f}%")
            )

        fig_turnover.update_traces(textposition='outside')
        fig_turnover.update_layout(yaxis_title='Turnover Rate (%)', xaxis_title='')
        return dcc.Graph(figure=fig_turnover)

    # --- SALARY TAB ---
    elif tab == 'salary':
        if selected_dept:
            filtered_salary = df[df['Department'] == selected_dept]
            fig_salary = px.line(
                filtered_salary.sort_values('Hire_Date'),
                x='Hire_Date', y='Salary_INR',
                title=f'Salary Trend Over Time – {selected_dept}',
                markers=True
            )
        else:
            avg_salary = df.groupby('Department', as_index=False)['Salary_INR'].mean()
            fig_salary = px.bar(
                avg_salary, x='Department', y='Salary_INR',
                title='Average Salary by Department',
                text=avg_salary['Salary_INR'].apply(lambda x: f"${x:,.0f}")
            )
            fig_salary.update_traces(textposition='outside')

        fig_salary.update_layout(
            yaxis_title='Salary (USD)',
            xaxis_title='',
            yaxis_tickprefix="$",
            yaxis_tickformat=",",
        )
        return dcc.Graph(figure=fig_salary)

    # --- EXPERIENCE VS PERFORMANCE TAB ---
    elif tab == 'exp_perf':
        if selected_dept:
            filtered_exp = df[df['Department'] == selected_dept]
            grouped = (
                filtered_exp.groupby(['Job_Title', 'Job_Level'], as_index=False)
                .agg({
                    'Experience_Years': 'mean',
                    'Performance_Rating': 'mean',
                    'Salary_INR': 'mean'
                })
            )
            fig_exp_perf = px.scatter(
                grouped,
                x='Experience_Years',
                y='Performance_Rating',
                color='Job_Level',
                size='Salary_INR',
                title=f'Experience vs Performance by Job Title – {selected_dept}',
                hover_data={'Job_Title': True,
                            'Experience_Years': ':.1f',
                            'Performance_Rating': ':.1f',
                            'Salary_INR': ':$,'
                           }
            )
        else:
            grouped = (
                df.groupby(['Department', 'Job_Title'], as_index=False)
                .agg({
                    'Experience_Years': 'mean',
                    'Performance_Rating': 'mean',
                    'Salary_INR': 'mean'
                })
            )
            fig_exp_perf = px.scatter(
                grouped,
                x='Experience_Years',
                y='Performance_Rating',
                color='Department',
                size='Salary_INR',
                title='Experience vs Performance by Job Title (All Departments)',
                hover_data={'Job_Title': True,
                            'Department': True,
                            'Experience_Years': ':.1f',
                            'Performance_Rating': ':.1f',
                            'Salary_INR': ':$,'
                           }
            )

        fig_exp_perf.update_layout(
            xaxis_title='Average Years of Experience',
            yaxis_title='Average Performance Rating (1–5)',
            plot_bgcolor='white'
        )
        return dcc.Graph(figure=fig_exp_perf)


In [7]:
# RUN SERVER
if __name__ == "__main__":
    # app.run_server(debug=True, mode="external")
    app.run(debug=True, jupyter_mode='external', port=8053)

Dash app running on http://127.0.0.1:8053/
