In [14]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

df = pd.read_csv("../Dataset/after_preprocessing.csv")

df['Experience'] = df['Years at Company'] + df['Company Tenure']
df['High Income'] = df['Monthly Income'].apply(lambda x: 1 if x > 0.5 else 0)
df['Promotion_Frequency'] = df['Number of Promotions'] / (df['Years at Company'] + 1)

df.drop(['Company Tenure', 'Years at Company', 'Monthly Income'], axis=1, inplace=True)


KNN Accuracy: 0.6711
Classification Report:
              precision    recall  f1-score   support

           0       0.65      0.64      0.65      6971
           1       0.69      0.70      0.69      7812

    accuracy                           0.67     14783
   macro avg       0.67      0.67      0.67     14783
weighted avg       0.67      0.67      0.67     14783

Confusion Matrix:
[[4490 2481]
 [2381 5431]]


In [17]:
X = df.drop(columns=['Attrition'])
y = df['Attrition']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# KNN

In [19]:
knn = KNeighborsClassifier(n_neighbors=29)
knn.fit(X_train, y_train)

y_pred_knn = knn.predict(X_test)

accuracy = accuracy_score(y_test, y_pred_knn)
print(f"KNN Accuracy: {accuracy:.4f}")

print("Classification Report:")
print(classification_report(y_test, y_pred_knn))

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_knn))

KNN Accuracy: 0.7070
Classification Report:
              precision    recall  f1-score   support

           0       0.69      0.69      0.69      6971
           1       0.72      0.72      0.72      7812

    accuracy                           0.71     14783
   macro avg       0.71      0.71      0.71     14783
weighted avg       0.71      0.71      0.71     14783

Confusion Matrix:
[[4799 2172]
 [2159 5653]]


# Logistic Regression

In [24]:
from sklearn.linear_model import LogisticRegression
log_reg = LogisticRegression(random_state=42)
log_reg.fit(X_train, y_train)

y_pred = log_reg.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f"Logistic Regression Accuracy: {accuracy:.4f}")

print("Classification Report:")
print(classification_report(y_test, y_pred))

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

Logistic Regression Accuracy: 0.7139
Classification Report:
              precision    recall  f1-score   support

           0       0.70      0.69      0.70      6971
           1       0.73      0.73      0.73      7812

    accuracy                           0.71     14783
   macro avg       0.71      0.71      0.71     14783
weighted avg       0.71      0.71      0.71     14783

Confusion Matrix:
[[4826 2145]
 [2085 5727]]


# SVM

In [25]:
from sklearn.svm import SVC
svm_model = SVC(kernel='rbf', random_state=42)  # You can try 'linear', 'poly', 'sigmoid'
svm_model.fit(X_train, y_train)

y_pred = svm_model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f"SVM Accuracy: {accuracy:.4f}")

print("Classification Report:")
print(classification_report(y_test, y_pred))

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

SVM Accuracy: 0.7337
Classification Report:
              precision    recall  f1-score   support

           0       0.72      0.71      0.72      6971
           1       0.75      0.75      0.75      7812

    accuracy                           0.73     14783
   macro avg       0.73      0.73      0.73     14783
weighted avg       0.73      0.73      0.73     14783

Confusion Matrix:
[[4979 1992]
 [1945 5867]]
