In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report


df = pd.read_csv("employee_performance.csv")  

# -------- STEP 2: Clean & Preprocess --------
df = df.drop(columns=["Employee_ID"], errors="ignore")

df = df.dropna(subset=["Performance_Score"])          
df = df.fillna(df.mean(numeric_only=True))             
df = df.fillna("Unknown")                             

# Encode categorical columns
for col in ["Gender", "Department"]:
    if df[col].dtype == "object":
        df[col] = LabelEncoder().fit_transform(df[col])

# -------- STEP 3: Split features and target --------
X = df.drop("Performance_Score", axis=1)
y = df["Performance_Score"]


if pd.api.types.is_numeric_dtype(y):
    y = pd.qcut(y, q=3, labels=["Low", "Medium", "High"])
    
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# -------- STEP 5: Train Bayesian Classifier --------
model = GaussianNB()
model.fit(X_train, y_train)

# -------- STEP 6: Predict & Evaluate --------
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.398989898989899

Classification Report:
               precision    recall  f1-score   support

        High       0.41      0.24      0.30        72
         Low       0.40      0.84      0.55        70
      Medium       0.27      0.05      0.09        56

    accuracy                           0.40       198
   macro avg       0.36      0.38      0.31       198
weighted avg       0.37      0.40      0.33       198

