# Employee Performance Prediction using Machine Learning

**Author**: Shivam Verma  
**Branch**: CSE  
**College**: GNIOT, Greater Noida  


This notebook is designed to predict the productivity of garments factory workers using various ML models.

## 1. Import Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

## 2. Load Dataset

In [None]:
df = pd.read_csv('garments_worker_productivity.csv')
df.head()

## 3. Preprocess Data

In [None]:
# Handle missing values
df.dropna(inplace=True)

# Convert date and extract month
df['date'] = pd.to_datetime(df['date'])
df['month'] = df['date'].dt.month
df.drop('date', axis=1, inplace=True)

# Label Encoding
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
for col in ['department', 'day', 'quarter']:
    df[col] = le.fit_transform(df[col])

df.head()

## 4. Split Data

In [None]:
X = df.drop('actual_productivity', axis=1)
y = df['actual_productivity']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## 5. Train Models

In [None]:
# Linear Regression
lr = LinearRegression()
lr.fit(X_train, y_train)

# Random Forest
rf = RandomForestRegressor()
rf.fit(X_train, y_train)

# XGBoost
xgb = XGBRegressor()
xgb.fit(X_train, y_train)

## 6. Evaluate Models

In [None]:
def evaluate(name, y_true, y_pred):
    print(f'{name}')
    print('MAE:', mean_absolute_error(y_true, y_pred))
    print('MSE:', mean_squared_error(y_true, y_pred))
    print('R2:', r2_score(y_true, y_pred))

evaluate('Linear Regression', y_test, lr.predict(X_test))
evaluate('Random Forest', y_test, rf.predict(X_test))
evaluate('XGBoost', y_test, xgb.predict(X_test))

## 7. Save Best Model

In [None]:
import pickle
with open('best_model.pkl', 'wb') as f:
    pickle.dump(xgb, f)