In [24]:
from google.colab import files
import pandas as pd
import io
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

# Upload the CSV file
uploaded = files.upload()
file_name = next(iter(uploaded))
df = pd.read_csv(io.BytesIO(uploaded[file_name]))

# Split into features and label
X = df.drop('Attrition', axis=1)  # Replace 'Attrition' with your target column
y = df['Attrition']

# Split train-test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Separate numeric and categorical columns
numeric_features = X.select_dtypes(include=['int64', 'float64']).columns.tolist()
categorical_features = X.select_dtypes(include=['object']).columns.tolist()

# Create transformers
numeric_transformer = StandardScaler()
categorical_transformer = OneHotEncoder(handle_unknown='ignore')

# Combine into a preprocessor
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])

# Build pipeline
clf = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier(random_state=42))
])

# Train the model
clf.fit(X_train, y_train)

# Predict and evaluate
y_pred = clf.predict(X_test)
print("\nClassification Report:\n")
print(classification_report(y_test, y_pred))

# ----------------------------
# User Input for Prediction
# ----------------------------

print("\n--- Predict Employee Attrition ---")

# Collect user input for each feature
user_input = {}
print("\nPlease enter the following details:")

for col in numeric_features + categorical_features:
    val = input(f"{col}: ")
    if col in numeric_features:
        user_input[col] = float(val)
    else:
        user_input[col] = val

# Create DataFrame for single prediction
input_df = pd.DataFrame([user_input])

# Predict
prediction = clf.predict(input_df)[0]
print(f"\nPredicted Attrition: {prediction}")


Saving 6. Predict Employee Attrition.csv to 6. Predict Employee Attrition (19).csv

Classification Report:

              precision    recall  f1-score   support

          No       0.88      0.99      0.93       255
         Yes       0.67      0.15      0.25        39

    accuracy                           0.88       294
   macro avg       0.78      0.57      0.59       294
weighted avg       0.86      0.88      0.84       294


--- Predict Employee Attrition ---

Please enter the following details:
Age: 41
DailyRate: 1102
DistanceFromHome: 1
Education: 2
EmployeeCount: 1
EmployeeNumber: 1
EnvironmentSatisfaction: 2
HourlyRate: 94
JobInvolvement: 3
JobLevel: 2
JobSatisfaction: 4
MonthlyIncome: 5993
MonthlyRate: 19479
NumCompaniesWorked: 8
PercentSalaryHike: 11
PerformanceRating: 3
RelationshipSatisfaction: 1
StandardHours: 80
StockOptionLevel: 0
TotalWorkingYears: 8
TrainingTimesLastYear: 0
WorkLifeBalance: 1
YearsAtCompany: 6
YearsInCurrentRole: 4
YearsSinceLastPromotion: 0
YearsWi