# Import The Libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

### Loading Our Dataset

In [2]:
data = pd.read_csv("https://raw.githubusercontent.com/sukhioo7/dataset/refs/heads/main/Salary_Data.csv?raw=True")

In [3]:
data.head()

Unnamed: 0,YearsExperience,Salary
0,1.1,39343.0
1,1.3,46205.0
2,1.5,37731.0
3,2.0,43525.0
4,2.2,39891.0


### Checking The  Null Values

In [None]:
data.isnull().sum()

### Checking The Info Our Data

In [None]:
data.info()

## Plotting The Scatterplot

In [None]:
plt.figure(figsize=(15,7))
sns.scatterplot(x=data['YearsExperience'],y=data['Salary'])
plt.title('Year of Experince V/S Salary')
plt.xlabel('Year of Experince')
plt.ylabel('Salary')
plt.show()

## Plotting The BoxPlot

In [None]:
plt.figure(figsize=(12,7))
sns.boxplot(y=data['Salary'])
plt.title('Salary Boxplot Outlier Detection')
plt.show()

## Plotting The Histplot

In [None]:
plt.figure(figsize=(13,7))
sns.histplot(data['Salary'],bins=10,kde=True)
plt.title("Salary Distribution")
plt.xlabel("Salary")
plt.ylabel("Frequency")
plt.show() 

# Splitting Our Data Into Training And Testing

In [None]:
X = data[['YearsExperience']]
Y = data['Salary']

In [None]:
from sklearn.model_selection import train_test_split

X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.2,random_state=101)

# Applying Linear Regression

In [None]:
from sklearn.linear_model import LinearRegression

linear_regression = LinearRegression()

## Fitting The Model

In [None]:
linear_regression.fit(X_train, Y_train)

# Making Prediction

In [None]:
prediction = linear_regression.predict(X_test)

print("Predicted Salaries:",prediction)

In [None]:
# Y_test

## Evaluate The Model

In [None]:
from sklearn.metrics import mean_squared_error,mean_absolute_error,r2_score

mae = mean_absolute_error(Y_test,prediction)
mse = mean_squared_error(Y_test,prediction)
np.sqrt(mse)
r2 = r2_score(Y_test,prediction)


print("Mean absolute error:",mae)
print("Mean squared error:",mse)
print("R-squared score:",r2)

# Checking Performance Of The Model

In [None]:
plt.figure(figsize=(12,7))
sns.scatterplot(x=X_test['YearsExperience'], y=Y_test, label="Actual Data", color="blue")
sns.lineplot(x=X_test['YearsExperience'], y=prediction, label="Predicted Line", color="red")

plt.xlabel("Years of Experience")
plt.ylabel("Salary")
plt.title("Actual vs. Predicted Salaries (Linear Regression)")
plt.legend()
plt.show()