In [2]:
# Scenario: Predicting Employee Salary Based on Multiple Factors

# A company wants to predict employee salary based on several important factors:

# Years of Experience

# Education Level (1 = Bachelor, 2 = Master, 3 = PhD)

# Number of Skills Known

# Performance Rating (1 to 5)

# Since salary depends on multiple variables, the company uses Multiple Linear Regression.

# Salary=b0​+b1​(Experience)+b2​(EducationLevel)+b3​(Skills)+b4​(Performance). Use the below link for dataset

# Step 1: Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score

# Step 2: Load dataset
df = pd.read_csv("/content/multil_salary_pred - Sheet1 (1).csv")

print(df.head())
print(df.info())

# Define independent variables
X = df[["Experience_years", "Education_Level",
        "Skills_Count", "Performance_Rating"]]

# Define dependent variable
y = df["Salary_lpa"]

# # Step 4: Split dataset
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# # Step 5: Train Linear Regression model
model = LinearRegression()
model.fit(X_train, y_train)

# # Step 6: Model parameters
print("Intercept (b0):", model.intercept_)
print("Coefficients:")
for feature, coef in zip(X.columns, model.coef_):
    print(feature, ":", coef)

# # Step 7: Predict on test data
y_pred = model.predict(X_test)

print("Actual values:", y_test.values)
print("Predicted values:", y_pred)

# # Step 8: Evaluate model
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Mean Absolute Error:", mae)
print("R2 Score:", r2)

# # Step 9: Predict salary for a new employee
new_employee = np.array([[5, 2, 7, 4]])
predicted_salary = model.predict(new_employee)

print("Predicted Salary (LPA):", predicted_salary[0])

   Experience_years  Education_Level  Skills_Count  Performance_Rating  \
0                 1                1             3                   3   
1                 2                1             4                   3   
2                 3                2             5                   4   
3                 4                2             6                   4   
4                 5                2             7                   5   

   Salary_lpa  
0         4.0  
1         5.0  
2         7.0  
3         8.0  
4        10.0  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12 entries, 0 to 11
Data columns (total 5 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Experience_years    12 non-null     int64  
 1   Education_Level     12 non-null     int64  
 2   Skills_Count        12 non-null     int64  
 3   Performance_Rating  12 non-null     int64  
 4   Salary_lpa          12 non-null     float64
dtypes: float6

