<a href="https://colab.research.google.com/github/Tausiq17/mlda/blob/main/T4_Performance_Metrics.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LinearRegression
from sklearn.metrics import (
    accuracy_score,
    classification_report,
    confusion_matrix,
    mean_squared_error,
    r2_score
)

# ===============================
# 1. Load Dataset
# ===============================
data = pd.read_csv("Employee_Details.csv")  # renamed from HR_comma_sep.csv
print("Dataset Preview:")
print(data.head())

# ===============================
# 2. Encode Categorical Variables
# ===============================
le = LabelEncoder()
data["salary"] = le.fit_transform(data["salary"])  # low=1, medium=2, high=3
print("\nAfter Encoding Salary:")
print(data.head())

# ===============================
# 3. Define Features & Target
# ===============================
X = data[[
    "satisfaction_level",
    "last_evaluation",
    "number_project",
    "average_montly_hours",
    "time_spend_company",
    "Work_accident",
    "promotion_last_5years",
    "salary"
]]
y = data["left"]

# ===============================
# 4. Train-Test Split
# ===============================
x_train, x_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# ===============================
# 5. Decision Tree Classifier
# ===============================
dt = DecisionTreeClassifier(criterion="entropy", random_state=42)
dt.fit(x_train, y_train)
y_dt_pred = dt.predict(x_test)

accuracy_dt = accuracy_score(y_test, y_dt_pred)
print("\nDecision Tree Accuracy:", accuracy_dt)
print("Classification Report:\n", classification_report(y_test, y_dt_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_dt_pred))

# ===============================
# 6. Linear Regression
# ===============================
lrr = LinearRegression()
lrr.fit(x_train, y_train)
y_lrr_pred = lrr.predict(x_test)

mse = mean_squared_error(y_test, y_lrr_pred)
r2 = r2_score(y_test, y_lrr_pred)

print("\nLinear Regression MSE:", mse)
print("Linear Regression R²:", r2)


Dataset Preview:
   satisfaction_level  last_evaluation  number_project  average_montly_hours  \
0                0.38             0.53               2                   157   
1                0.80             0.86               5                   262   
2                0.11             0.88               7                   272   
3                0.72             0.87               5                   223   
4                0.37             0.52               2                   159   

   time_spend_company  Work_accident  left  promotion_last_5years Department  \
0                   3              0     1                      0      sales   
1                   6              0     1                      0      sales   
2                   4              0     1                      0      sales   
3                   5              0     1                      0      sales   
4                   3              0     1                      0      sales   

   salary  
0     low