# Salary Predictor Model

Build a Linear Regression model to predict employee salary based on years of experience.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import os

## 1. Load Data

In [None]:
df = pd.read_csv('dataset/salary_data.csv')
print(df.head())
X = df[['YearsExperience']]
y = df['Salary($)']

## 2. Visualize Data

Creating a scatter plot to confirm the linear relationship.

In [None]:
plt.figure(figsize=(10, 6))
plt.scatter(X, y, color='blue')
plt.title('Salary vs Experience')
plt.xlabel('Years of Experience')
plt.ylabel('Salary ($)')
plt.grid(True)
plt.show()

## 3. Build Model

Split the data into Training (80%) and Test (20%) sets and train the Linear Regression model.

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = LinearRegression()
model.fit(X_train, y_train)

## 4. Evaluation

Calculate RMSE and RÂ² Score.

In [None]:
y_pred = model.predict(X_test)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)

print(f'RMSE: {rmse:.2f}')
print(f'R^2 Score: {r2:.4f}')

## 5. Visual Proof

Actual vs Predicted plot.

In [None]:
plt.figure(figsize=(8, 8))
plt.scatter(y_test, y_pred, color='blue', alpha=0.7, label='Data Points')
max_val = max(y_test.max(), y_pred.max())
min_val = min(y_test.min(), y_pred.min())
plt.plot([min_val, max_val], [min_val, max_val], color='red', linestyle='--', label='Perfect Prediction')
plt.title('Actual vs Predicted Salaries')
plt.xlabel('Actual Salaries ($)')
plt.ylabel('Predicted Salaries ($)')
plt.legend()
plt.grid(True)
plt.show()