# Import Required Libraries
Import the necessary libraries for data manipulation, visualization, and machine learning.

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import Ridge, Lasso
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
import seaborn as sns

# Load and Preprocess the Dataset
Load the dataset and preprocess it by converting categorical variables and filtering data.

In [None]:
# Load the dataset
df = pd.read_csv('employee_dataset_with_future_salaries.csv')

# Convert Attrition to binary
df['Attrition'] = df['Attrition'].map({'Yes': 1, 'No': 0})

# Filter employees who are staying
df_stayers = df[df['Attrition'] == 0].copy()

# Define Features and Target
Define the target variable and exclude unnecessary columns.

In [None]:
# Define target and features
target = 'FutureSalary_PerformanceBased'
exclude_cols = ['EmployeeCount', 'Over18', 'StandardHours', 'EmployeeNumber',
                'Attrition', 'FutureSalary_Fixed', target]

X = df_stayers.drop(columns=exclude_cols)
y = df_stayers[target]

# One-hot encoding
X = pd.get_dummies(X, drop_first=True)

# Train-Test Split
Split the data into training and testing sets.

In [None]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Train Regression Models
Train multiple regression models and evaluate their performance.

In [None]:
# Regression models
models = {
    "Random Forest": RandomForestRegressor(random_state=42),
    "Ridge Regression": Ridge(alpha=1.0),
    "Lasso Regression": Lasso(alpha=0.1),
    "Support Vector Regressor": SVR(kernel='rbf')
}

# Store predictions
results = []

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    # Metrics
    r2 = r2_score(y_test, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))

    print(f"\n{name}")
    print(f"R² Score: {r2:.4f}")
    print(f"RMSE: {rmse:.2f}")

    # Store results
    temp = pd.DataFrame({
        'Model': name,
        'Actual': y_test,
        'Predicted': y_pred
    })
    results.append(temp)

    # Plotting Actual vs Predicted
    plt.figure(figsize=(6, 4))
    sns.scatterplot(x=y_test, y=y_pred, alpha=0.6)
    plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--')
    plt.xlabel('Actual Future Salary')
    plt.ylabel('Predicted Future Salary')
    plt.title(f'{name} - Actual vs Predicted')
    plt.tight_layout()
    plt.show()

# Save Predictions
Combine all predictions and save them to a CSV file.