In [None]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Load dataset
data = pd.read_csv("Employee.csv")
print("First 5 rows:\n", data.head())

# Encode categorical columns
le = LabelEncoder()
categorical_cols = ["Education", "City", "Gender", "EverBenched"]

for col in categorical_cols:
    data[col] = le.fit_transform(data[col])

print("\nAfter Encoding:\n", data.head())

# Features (independent variables)
X = data[[
    "Education",
    "JoiningYear",
    "City",
    "PaymentTier",
    "Age",
    "Gender",
    "EverBenched",
    "ExperienceInCurrentDomain"
]]

# Target
y = data["LeaveOrNot"]
# Train-test split
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2)

# Linear Regression Model
lrr = LinearRegression()
lrr.fit(x_train, y_train)
y_lrr_pred = lrr.predict(x_test)

# Evaluation
mse = mean_squared_error(y_test, y_lrr_pred)
r2 = r2_score(y_test, y_lrr_pred)

print("\nLinear Regression MSE:", mse)
print("Linear Regression R2:", r2)
