In [None]:
import streamlit as st
import pandas as pd
import joblib
from datetime import datetime
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

st.set_page_config(page_title="Employee Performance Dashboard", layout="wide")

@st.cache_resource
def load_model():
    return joblib.load("random_forest_model.pkl")

@st.cache_resource
def load_label_encoders():
    return joblib.load("label_encoders.pkl")

@st.cache_data
def load_data():
    return pd.read_csv("Extended_Employee_Performance_and_Productivity_Data (1).csv")

model = load_model()
label_encoders = load_label_encoders()
df = load_data()

st.title("Employee Performance Dashboard")
tab1, tab2, tab3 = st.tabs(["Visualisasi", "Analisis Interaktif", "Prediksi Interaktif"])

# --- TAB 1: VISUALISASI ---
with tab1:
    st.header("Visualisasi Performa Karyawan")

    # METRICS RINGKAS
    col1, col2, col3 = st.columns(3)
    col1.metric("Jumlah Karyawan", len(df))
    col2.metric("Rata-rata Skor", round(df['Performance_Score'].mean(), 2))
    col3.metric("Performa Maksimum", df['Performance_Score'].max())

    st.divider()

    # BAR CHART + PIE CHART
    st.subheader("Distribusi Performance Score")
    col_a, col_b = st.columns(2)

    with col_a:
        fig1, ax1 = plt.subplots(figsize=(5, 4))
        bar_palette = sns.color_palette("Blues", len(df['Performance_Score'].unique()))
        sns.countplot(data=df, x="Performance_Score", palette=bar_palette, ax=ax1)
        ax1.set_title("Jumlah Karyawan per Skor Performa")
        st.pyplot(fig1)

    with col_b:
        fig2, ax2 = plt.subplots(figsize=(4, 4))
        df['Performance_Score'].value_counts().sort_index().plot.pie(
            autopct='%1.1f%%', startangle=90, ax=ax2, colors=bar_palette)
        ax2.set_ylabel("")
        ax2.set_title("Proporsi Kategori Performa", fontsize=8)
        st.pyplot(fig2)

    st.divider()

    # Tren Performa
    st.subheader("Tren Skor Performa berdasarkan Waktu")
    mode = st.radio("Pilih Mode Tampilan", ["Rata-rata Bulanan", "Per Divisi (Top 3)"])

    df['Hire_Date'] = pd.to_datetime(df['Hire_Date'])
    df['Hire_Month'] = df['Hire_Date'].dt.to_period("M").dt.to_timestamp()

    col_f, col_g = st.columns(2)

    with col_f:
        if mode == "Rata-rata Bulanan":
            line_data = df.groupby('Hire_Month')['Performance_Score'].mean().reset_index()
            fig4, ax4 = plt.subplots(figsize=(6, 4))
            sns.lineplot(data=line_data, x='Hire_Month', y='Performance_Score', ax=ax4)
            ax4.set_title("Rata-rata Skor Per Bulan")
            ax4.set_xlabel("Bulan")
            ax4.set_ylabel("Skor")
            ax4.xaxis.set_major_locator(plt.MaxNLocator(5))
            st.pyplot(fig4)
        else:
            top_dept = df['Department'].value_counts().head(3).index
            dept_means = {
                dept: df[df['Department'] == dept].groupby('Hire_Month')['Performance_Score'].mean()
                for dept in top_dept
            }
            max_dept = max(dept_means, key=lambda k: dept_means[k].mean())

            fig5, ax5 = plt.subplots(figsize=(6, 4))
            for dept in top_dept:
                temp = dept_means[dept].reset_index()
                sns.lineplot(
                    data=temp,
                    x='Hire_Month',
                    y='Performance_Score',
                    label=dept,
                    ax=ax5,
                    linewidth=2 if dept == max_dept else 1,
                    alpha=1 if dept == max_dept else 0.3
                )
            ax5.set_title("Top 3 Divisi")
            ax5.set_xlabel("Bulan")
            ax5.set_ylabel("Skor")
            ax5.legend(title="Divisi")
            ax5.xaxis.set_major_locator(plt.MaxNLocator(5))
            st.pyplot(fig5)

    with col_g:
        # Fitur numerik tiap skor
        st.subheader("Analisis Fitur Numerik terhadap Skor")
        col_e = st.selectbox("Pilih Fitur Numerik:", [
            'Monthly_Salary', 'Work_Hours_Per_Week', 'Projects_Handled',
            'Overtime_Hours', 'Sick_Days', 'Training_Hours',
            'Employee_Satisfaction_Score'])

        fig3, ax3 = plt.subplots(figsize=(6, 4))
        sns.barplot(data=df, x="Performance_Score", y=col_e, palette="mako", ax=ax3)
        ax3.set_title(f"Rata-rata {col_e} per Skor Performa")
        ax3.set_xlabel("Skor")
        ax3.set_ylabel(col_e)
        st.pyplot(fig3)

    st.divider()


# --- TAB 2: ANALISIS INTERAKTIF ---
with tab2:
    st.header("Analisis Berdasarkan Filter")

    # Filter berbasis rentang numerik
    age_range = st.slider("Usia", int(df['Age'].min()), int(df['Age'].max()), (25, 40))
    salary_range = st.slider("Rentang Gaji Bulanan", float(df['Monthly_Salary'].min()), float(df['Monthly_Salary'].max()), (3850.0, 9000.0))
    years_at_company_range = st.slider("Rentang Tahun di Perusahaan", int(df['Years_At_Company'].min()), int(df['Years_At_Company'].max()), (0, 10))
    satisfaction_range = st.slider("Rentang Skor Kepuasan Karyawan", 1.0, 5.0, (2.0, 4.5))
    remote_freq_range = st.slider("Rentang Frekuensi Kerja Remote", 0, 100, (0, 100))

    # Filter berbasis kategori
    education_options = st.multiselect("Pilih Tingkat Pendidikan", options=df['Education_Level'].unique(), default=list(df['Education_Level'].unique()))
    jobtitle_options = st.multiselect("Pilih Jabatan", options=df['Job_Title'].unique(), default=list(df['Job_Title'].unique()))

    # Terapkan semua filter
    filtered_df = df[
        (df['Age'] >= age_range[0]) & (df['Age'] <= age_range[1]) &
        (df['Monthly_Salary'] >= salary_range[0]) & (df['Monthly_Salary'] <= salary_range[1]) &
        (df['Years_At_Company'] >= years_at_company_range[0]) & (df['Years_At_Company'] <= years_at_company_range[1]) &
        (df['Employee_Satisfaction_Score'] >= satisfaction_range[0]) & (df['Employee_Satisfaction_Score'] <= satisfaction_range[1]) &
        (df['Remote_Work_Frequency'] >= remote_freq_range[0]) & (df['Remote_Work_Frequency'] <= remote_freq_range[1]) &
        (df['Education_Level'].isin(education_options)) &
        (df['Job_Title'].isin(jobtitle_options))
    ]

    st.dataframe(filtered_df, use_container_width=True)

    # Boxplot
    st.subheader("Distribusi Fitur Berdasarkan Performance Score")
    col3, col4 = st.columns(2)

    with col3:
        fig_a, ax_a = plt.subplots()
        sns.boxplot(data=filtered_df, x="Performance_Score", y="Monthly_Salary", palette="pastel", ax=ax_a)
        ax_a.set_title("Distribusi Gaji per Skor Performa")
        st.pyplot(fig_a)

    with col4:
        fig_b, ax_b = plt.subplots()
        sns.boxplot(data=filtered_df, x="Performance_Score", y="Employee_Satisfaction_Score", palette="Set2", ax=ax_b)
        ax_b.set_title("Distribusi Kepuasan per Skor Performa")
        st.pyplot(fig_b)


# --- TAB 3: PREDIKSI INTERAKTIF ---
with tab3:
    st.header("Prediksi Performance Score Berdasarkan Input")

    # Input User
    department = st.selectbox("Department", label_encoders['Department'].classes_)
    gender = st.selectbox("Gender", label_encoders['Gender'].classes_)
    age = st.slider("Age", 22, 60, 40)
    job_title = st.selectbox("Job Title", label_encoders['Job_Title'].classes_)
    hire_date = st.date_input("Hire Date", datetime(2020, 1, 1))
    years_at_company = st.slider("Years at Company", 0, 10, 4)
    education = st.selectbox("Education Level", label_encoders['Education_Level'].classes_)
    monthly_salary = st.number_input("Monthly Salary", 3850.0, 9000.0, 6400.0)
    work_hours = st.slider("Work Hours Per Week", 30, 60, 45)
    projects = st.slider("Projects Handled", 0, 49, 24)
    overtime = st.slider("Overtime Hours", 0, 29, 15)
    sick_days = st.slider("Sick Days", 0, 14, 7)
    remote_freq = st.slider("Remote Work Frequency", 0, 100, 50)
    team_size = st.slider("Team Size", 1, 19, 10)
    training_hours = st.slider("Training Hours", 0, 99, 49)
    promotions = st.slider("Promotions", 0, 2, 1)
    satisfaction = st.slider("Employee Satisfaction Score", 1.0, 5.0, 3.0)
    resigned = st.selectbox("Resigned?", ['False', 'True'])

    input_dict = {
        'Department': label_encoders['Department'].transform([department])[0],
        'Gender': label_encoders['Gender'].transform([gender])[0],
        'Age': age,
        'Job_Title': label_encoders['Job_Title'].transform([job_title])[0],
        'Years_At_Company': years_at_company,
        'Education_Level': label_encoders['Education_Level'].transform([education])[0],
        'Monthly_Salary': monthly_salary,
        'Work_Hours_Per_Week': work_hours,
        'Projects_Handled': projects,
        'Overtime_Hours': overtime,
        'Sick_Days': sick_days,
        'Remote_Work_Frequency': remote_freq,
        'Team_Size': team_size,
        'Training_Hours': training_hours,
        'Promotions': promotions,
        'Employee_Satisfaction_Score': satisfaction,
        'Resigned': label_encoders['Resigned'].transform([resigned])[0]
    }

    input_df = pd.DataFrame([input_dict])

    if st.button("Prediksi Performance Score"):
        pred = model.predict(input_df)[0]
        st.success(f"Prediksi Performance Score: **{pred}**")
