## Import Required libraries

In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split 
from sklearn.preprocessing import StandardScaler , MinMaxScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score , mean_squared_error

## Loading Data

In [5]:
data = pd.read_csv('DATA/Salary.csv')
data

Unnamed: 0,YearsExperience,Salary
0,1.1,39343
1,1.3,46205
2,1.5,37731
3,2.0,43525
4,2.2,39891
5,2.9,56642
6,3.0,60150
7,3.2,54445
8,3.2,64445
9,3.7,57189


# Performing EDA (exploratory data analysis)

## Checking null value is present or not

In [6]:
data.isnull().sum()

YearsExperience    0
Salary             0
dtype: int64

In [7]:
data.describe()

Unnamed: 0,YearsExperience,Salary
count,35.0,35.0
mean,6.308571,83945.6
std,3.61861,32162.673003
min,1.1,37731.0
25%,3.45,57019.0
50%,5.3,81363.0
75%,9.25,113223.5
max,13.5,139465.0


## Visualizing data

## Preparing data

In [8]:
X = data.drop('Salary',axis=1)
y = data['Salary']

In [9]:
X.shape , y.shape

((35, 1), (35,))

## Spliting data into train and test

In [10]:
X_train , X_test , Y_train , Y_test = train_test_split(X,y,random_state=101,test_size=0.2)
X_train.shape , X_test.shape , Y_train.shape , Y_test.shape

((28, 1), (7, 1), (28,), (7,))

## Defining LinearRegression Model

In [11]:
lr = LinearRegression()
lr.fit(X_train, Y_train)

LinearRegression()

## Testing model

In [12]:
pred = lr.predict(X_test)
pred

array([ 80430.70927962,  56616.04598277,  88368.93037856, 147464.57633739,
       127178.01130675, 112183.5936754 , 107773.47084266])

In [13]:
Y_test

18     81363
7      54445
20     91738
34    139465
30    127345
26    116969
25    105582
Name: Salary, dtype: int64

## Checking Actual data , Predicted data and difference between the Actual and Predicted data

In [14]:
diff = Y_test - pred

In [15]:
pd.DataFrame(np.c_[Y_test , pred , diff] , columns=['Actual','Predicted','Difference'])

Unnamed: 0,Actual,Predicted,Difference
0,81363.0,80430.70928,932.29072
1,54445.0,56616.045983,-2171.045983
2,91738.0,88368.930379,3369.069621
3,139465.0,147464.576337,-7999.576337
4,127345.0,127178.011307,166.988693
5,116969.0,112183.593675,4785.406325
6,105582.0,107773.470843,-2191.470843


## Visualizing Model, that how it is performing on training data

## Visualizing Model, that how it is performing on testing data

## Evaluating

In [16]:
lr.score(X_test , Y_test)

0.9785870620257094

In [17]:
rmse = np.sqrt(mean_squared_error(Y_test,pred))
r2 = r2_score(Y_test,pred)

In [18]:
rmse , r2

(3939.8513697101125, 0.9785870620257094)