# Employee Salary Prediction using Machine Learning
This project predicts employee salaries based on features like experience, education, job role, location, and industry.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

## Load Sample Dataset

In [None]:
# Sample dataset
data = {
    'experience': [1, 3, 5, 7, 9],
    'education': ['Bachelors', 'Masters', 'PhD', 'Bachelors', 'Masters'],
    'job_role': ['Developer', 'Manager', 'Data Scientist', 'Developer', 'Manager'],
    'location': ['Delhi', 'Mumbai', 'Bangalore', 'Hyderabad', 'Chennai'],
    'industry': ['Tech', 'Finance', 'Healthcare', 'Tech', 'Finance'],
    'salary': [30000, 50000, 90000, 60000, 70000]
}

# Create DataFrame
df = pd.DataFrame(data)
df.head()

## Data Preprocessing (Encoding Categorical Features)

In [None]:
# Encode categorical variables using one-hot encoding
df_encoded = pd.get_dummies(df, columns=['education', 'job_role', 'location', 'industry'], drop_first=True)

## Split Data into Training and Test Sets

In [None]:
# Separate features and target
X = df_encoded.drop('salary', axis=1)
y = df_encoded['salary']

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Train Linear Regression Model

In [None]:
# Train model
model = LinearRegression()
model.fit(X_train, y_train)

## Model Evaluation

In [None]:
# Predict and evaluate
y_pred = model.predict(X_test)

r2 = r2_score(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)

print("R² Score:", r2)
print("Mean Squared Error:", mse)

## Visualization: Actual vs Predicted Salary

In [None]:
plt.figure(figsize=(8, 5))
sns.scatterplot(x=y_test, y=y_pred)
plt.plot([y.min(), y.max()], [y.min(), y.max()], 'r--')
plt.xlabel("Actual Salary")
plt.ylabel("Predicted Salary")
plt.title("Actual vs Predicted Salary")
plt.grid(True)
plt.show()

## Conclusion
This model is a simple Linear Regression approach.