## Salary Prediction (Linear Regression)

In [49]:
# Libraries and Functions
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_percentage_error

In [61]:
salary = pd.read_csv('https://github.com/YBIFoundation/Dataset/raw/main/Salary%20Data.csv', on_bad_lines='skip')
salary

Unnamed: 0,Experience Years,Salary
0,1.1,39343
1,1.2,42774
2,1.3,46205
3,1.5,37731
4,2.0,43525
5,2.2,39891
6,2.5,48266
7,2.9,56642
8,3.0,60150
9,3.2,54445


In [63]:
salary.head()

Unnamed: 0,Experience Years,Salary
0,1.1,39343
1,1.2,42774
2,1.3,46205
3,1.5,37731
4,2.0,43525


In [67]:
salary.describe()

Unnamed: 0,Experience Years,Salary
count,40.0,40.0
mean,5.1525,74743.625
std,2.663715,25947.122885
min,1.1,37731.0
25%,3.2,56878.25
50%,4.6,64472.5
75%,6.875,95023.25
max,10.5,122391.0


In [131]:
# Step 3: Define y (output) and x (input)
y = salary['Salary']
x = salary[['Experience Years']]

In [132]:
#  Step 4: Train Test Split
x_train, x_test, y_train, y_test = train_test_split(x,y, random_state=2529)

In [77]:
# Step 5: Model Train
model = LinearRegression() # Step 1: Create a Linear Regression model
model.fit(x_train, y_train) # Step 2: Train the model using training data

In [81]:
model.intercept_

27010.963630394675

In [84]:
model.coef_

array([9343.3671387])

In [97]:
# Step 6: Predictions

y_pred = model.predict(x_test)

array([ 90545.86017355,  59712.74861584, 106429.58430934,  64384.43218519,
        69056.11575454, 123247.645159  ,  84939.83989033,  63450.09547132,
        65318.76889906,  61581.42204358])

In [101]:
# Step 7: Evaluation (Error)
mean_absolute_percentage_error(y_test, y_pred)


0.06230870755908321

In [103]:
from sklearn.metrics import mean_absolute_percentage_error

mape = mean_absolute_percentage_error(y_test, y_pred)
print("MAPE:", mape)
print("MAPE (%):", mape * 100)  # Convert to percentage


MAPE: 0.06230870755908321
MAPE (%): 6.230870755908321


## Display The Predicted Salary

In [107]:
# Convert x_test to a NumPy array (if it's a DataFrame)
x_test_array = x_test.to_numpy().flatten()  # Flatten to 1D array
y_pred_array = y_pred.flatten()  # Ensure y_pred is also 1D

# Loop through values
for i in range(len(x_test_array)):
    print(f"Experience: {x_test_array[i]} years → Predicted Salary: ₹{y_pred_array[i]:,.2f}")


Experience: 6.8 years → Predicted Salary: ₹90,545.86
Experience: 3.5 years → Predicted Salary: ₹59,712.75
Experience: 8.5 years → Predicted Salary: ₹106,429.58
Experience: 4.0 years → Predicted Salary: ₹64,384.43
Experience: 4.5 years → Predicted Salary: ₹69,056.12
Experience: 10.3 years → Predicted Salary: ₹123,247.65
Experience: 6.2 years → Predicted Salary: ₹84,939.84
Experience: 3.9 years → Predicted Salary: ₹63,450.10
Experience: 4.1 years → Predicted Salary: ₹65,318.77
Experience: 3.7 years → Predicted Salary: ₹61,581.42


## Display Actual vs. Predicted Salary

In [110]:
import pandas as pd

# Convert to NumPy arrays if they are DataFrames/Series
x_test_array = x_test.to_numpy().flatten()  # Convert x_test to 1D NumPy array
y_test_array = y_test.to_numpy().flatten()  # Convert y_test (actual salary) to 1D NumPy array
y_pred_array = y_pred.flatten()  # Convert y_pred (predicted salary) to 1D NumPy array

# Create a DataFrame for better visualization
results_df = pd.DataFrame({
    'Experience (years)': x_test_array,
    'Actual Salary (₹)': y_test_array,
    'Predicted Salary (₹)': y_pred_array
})

# Display results
print(results_df)


   Experience (years)  Actual Salary (₹)  Predicted Salary (₹)
0                 6.8              91738          90545.860174
1                 3.5              60000          59712.748616
2                 8.5             111620         106429.584309
3                 4.0              56957          64384.432185
4                 4.5              61111          69056.115755
5                10.3             122391         123247.645159
6                 6.2              91000          84939.839890
7                 3.9              63218          63450.095471
8                 4.1              57081          65318.768899
9                 3.7              57189          61581.422044


## ✅ Save Results to CSV

In [117]:
# Save DataFrame to csv file
results_df.to_csv("salary_predictions.csv", index= False)
print("File saved successfully as salary_predictions.csv")

File saved successfully as salary_predictions.csv


# Save It!

In [129]:
import pickle
with open('salary.pkl', 'wb') as file:
    pickle.dump(model, file)
print("✅ Model successfully saved as 'salary.pkl'")

✅ Model successfully saved as 'salary.pkl'
