In [None]:
import pandas as pd
df = pd.read_csv("C:\\Users\\DELL\\Downloads\\employee_attrition.csv",encoding="utf-8")
print("Shape of dataset:",df.shape)
print("Columns:\n", df.columns)

In [None]:
df.head()

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
print("Missing values:")
print(df.isnull().sum())
print("\n Number of duplicate rows:", df.duplicated().sum())

In [None]:
from sklearn.preprocessing import LabelEncoder
categorical_cols = df.select_dtypes(include=['object']).columns
lab = LabelEncoder()

for col in categorical_cols:
    df[col] = lab.fit_transform(df[col])

In [None]:
df.head()

In [None]:
df.dtypes

In [None]:
y = df['Attrition']
X = df.drop(['Attrition', 'Age', 'Years_At_Company', 'Salary'], axis=1)
X = pd.get_dummies(X, columns=['Department', 'Overtime'], drop_first=True)
display(X.head())
display(y.head())

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)
print("y_train shape:", y_train.shape)
print("y_test shape:", y_test.shape)

In [None]:
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-score: {f1:.4f}")

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

n_estimators_values = [50, 100, 150]
max_depth_values = [None, 5, 10] 
results = []

for n_estimators in n_estimators_values:
    for max_depth in max_depth_values:
        model = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, random_state=42)
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

        accuracy = accuracy_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred, average='weighted', zero_division=0)
        recall = recall_score(y_test, y_pred, average='weighted', zero_division=0)
        f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)

        results.append({
            'n_estimators': n_estimators,
            'max_depth': max_depth,
            'accuracy': accuracy,
            'precision': precision,
            'recall': recall,
            'f1_score': f1
        })

results_df = pd.DataFrame(results)
display(results_df)