In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.linear_model import LogisticRegression


In [None]:
df = pd.read_csv("WA_Fn-UseC_-HR-Employee-Attrition.csv")

In [None]:
df.shape

In [None]:
df.head()

In [None]:
df.info()

In [None]:
df.isnull().sum()


In [None]:
df.duplicated().sum()

In [None]:
df['Attrition'].value_counts()


In [None]:
sns.countplot(x='Attrition', data=df)
plt.title("Attrition Count")
plt.show()


In [None]:
sns.countplot(x='Gender', hue='Attrition', data=df)
plt.title("Attrition vs Gender")
plt.show()


In [None]:
sns.histplot(df['MonthlyIncome'], bins=30, kde=True)
plt.title("Monthly Income Distribution")
plt.show()


In [None]:
sns.countplot(x='JobSatisfaction', hue='Attrition', data=df)
plt.title("Job Satisfaction vs Attrition")
plt.show()


In [None]:
df['Attrition'] = df['Attrition'].map({'Yes':1, 'No':0})


In [None]:
le = LabelEncoder()

for col in df.select_dtypes(include='object').columns:
    df[col] = le.fit_transform(df[col])


In [None]:
X = df.drop('Attrition', axis=1)
y = df['Attrition']


In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [None]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [None]:
model = LogisticRegression()
model.fit(X_train, y_train)


In [None]:
y_pred = model.predict(X_test)


In [None]:
accuracy_score(y_test, y_pred)


In [None]:
cm = confusion_matrix(y_test, y_pred)
sns.heatmap(cm, annot=True, fmt='d')
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()


In [None]:
print(classification_report(y_test, y_pred))
