In [8]:
import numpy as np
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression

In [9]:
url = 'https://drive.google.com/file/d/1SpN7-kXajDtiy6tYFQHtTCUwFXIEcql-/view?usp=share_link'
path = 'https://drive.google.com/uc?export=downlaod&id='+url.split('/')[-2]
df = pd.read_csv(path)
df.head(4)


Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit
0,114523.61,136897.8,471784.1,Dhaka,192261.83
1,162597.7,151377.59,443898.53,Ctg,191792.06
2,153441.51,101145.55,407934.54,Rangpur,191050.39
3,144372.41,118671.85,383199.62,Dhaka,182901.99


In [101]:
le = LabelEncoder()
df["Area_encoded"] = le.fit_transform(df["Area"])

In [102]:
df["Area_encoded"].head(3)

0    1
1    0
2    2
Name: Area_encoded, dtype: int32

In [104]:
df.head(5)

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit,Area_encoded
0,114523.61,136897.8,471784.1,Dhaka,192261.83,1
1,162597.7,151377.59,443898.53,Ctg,191792.06,0
2,153441.51,101145.55,407934.54,Rangpur,191050.39,2
3,144372.41,118671.85,383199.62,Dhaka,182901.99,1
4,142107.34,91391.77,366168.42,Rangpur,166187.94,2


In [100]:
df2 = df.drop("Area", axis = 1)
df2.head(4)

Unnamed: 0,Marketing Spend,Administration,Transport,Profit,Area_encoded
0,114523.61,136897.8,471784.1,192261.83,1
1,162597.7,151377.59,443898.53,191792.06,0
2,153441.51,101145.55,407934.54,191050.39,2
3,144372.41,118671.85,383199.62,182901.99,1


In [105]:
x = df2.drop("Profit", axis = 1)
y = df2["Profit"]

print("shape of x:",x.shape)
print("shape of y:",y.shape)


shape of x: (50, 4)
shape of y: (50,)


In [106]:
x.head(4)

Unnamed: 0,Marketing Spend,Administration,Transport,Area_encoded
0,114523.61,136897.8,471784.1,1
1,162597.7,151377.59,443898.53,0
2,153441.51,101145.55,407934.54,2
3,144372.41,118671.85,383199.62,1


In [107]:
y.head(4)

0    192261.83
1    191792.06
2    191050.39
3    182901.99
Name: Profit, dtype: float64

### Data Splitting

In [108]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.5)

In [109]:
print("shape of x_train: ",x_train.shape)
print("shape of x_test: ",x_test.shape)
print("shape of y_train: ",y_train.shape)
print("shape of y_test: ",y_test.shape)

shape of x_train:  (25, 4)
shape of x_test:  (25, 4)
shape of y_train:  (25,)
shape of y_test:  (25,)


## Feature Scaling

In [110]:
sc = StandardScaler()
sc.fit(x_train)
x_train = sc.transform(x_train)
x_test = sc.transform(x_test)

### Model Generating

In [111]:
lr = LinearRegression()
lr.fit(x_train,y_train)
lr.coef_

array([31262.65345161,  2600.39792766, 11882.30484453,  -713.18626385])

In [112]:
lr.intercept_

116473.48280000001

In [113]:
x_test

array([[ 1.34506269, -1.27605149,  1.05508794,  1.01913032],
       [-0.51828427, -0.83740033, -0.17936617,  1.01913032],
       [ 0.45364753, -1.26105441,  0.10665198, -1.29707495],
       [-1.27693992, -2.23278368, -0.41862371, -0.13897232],
       [-0.30075106,  1.04172954, -1.05507559, -0.13897232],
       [ 0.26168792, -0.41933311,  0.47466598,  1.01913032],
       [-1.71214682,  0.37974597, -1.92786819, -1.29707495],
       [-1.23502862,  1.10843801, -1.69704209, -1.29707495],
       [-0.71336504,  1.21702351, -0.21062506, -1.29707495],
       [-1.70048549, -2.76690786, -1.92786819, -0.13897232],
       [-0.37843588, -0.36422816, -1.18547623,  1.01913032],
       [ 1.18388701,  0.82239156, -0.88743529, -1.29707495],
       [-1.20355046, -1.09564369, -0.72219125, -1.29707495],
       [ 0.48034821, -0.55401044, -0.06103035,  1.01913032],
       [-1.37856613,  0.07725441, -1.63839246, -0.13897232],
       [-0.72222962, -1.51460922, -0.25363832, -0.13897232],
       [-0.07166839, -0.

In [118]:
x_test


array([[ 1.34506269, -1.27605149,  1.05508794,  1.01913032],
       [-0.51828427, -0.83740033, -0.17936617,  1.01913032],
       [ 0.45364753, -1.26105441,  0.10665198, -1.29707495],
       [-1.27693992, -2.23278368, -0.41862371, -0.13897232],
       [-0.30075106,  1.04172954, -1.05507559, -0.13897232],
       [ 0.26168792, -0.41933311,  0.47466598,  1.01913032],
       [-1.71214682,  0.37974597, -1.92786819, -1.29707495],
       [-1.23502862,  1.10843801, -1.69704209, -1.29707495],
       [-0.71336504,  1.21702351, -0.21062506, -1.29707495],
       [-1.70048549, -2.76690786, -1.92786819, -0.13897232],
       [-0.37843588, -0.36422816, -1.18547623,  1.01913032],
       [ 1.18388701,  0.82239156, -0.88743529, -1.29707495],
       [-1.20355046, -1.09564369, -0.72219125, -1.29707495],
       [ 0.48034821, -0.55401044, -0.06103035,  1.01913032],
       [-1.37856613,  0.07725441, -1.63839246, -0.13897232],
       [-0.72222962, -1.51460922, -0.25363832, -0.13897232],
       [-0.07166839, -0.

In [115]:
y_predict = lr.predict(x_test)
y_predict

array([167015.51674744,  95234.8540612 , 129568.79233333,  65871.72516846,
        97342.5014217 , 128477.40471374,  41952.25927177,  61505.87537514,
        95758.8891703 ,  33308.32836517,  88882.41517659, 146003.75694569,
        68341.95173365, 128597.78398951,  54207.97446376,  87041.38695069,
       120040.02278228,  99201.48241914, 124656.26321421, 103016.52747532,
       133085.73025007, 151809.28377484, 161464.29290552, 153662.29617918,
        40880.97955415])

In [116]:
y_test

4     166187.94
33     96778.92
11    144259.40
38     81229.06
29    101004.64
18    124266.90
47     42559.73
44     65200.33
34     96712.80
48     35673.41
30     99937.59
6     156122.51
42     71498.49
10    146121.95
43     69758.98
35     96479.51
20    118474.03
25    107404.34
21    111313.02
24    108552.04
17    125370.37
9     149759.96
5     156991.12
8     152211.77
45     64926.08
Name: Profit, dtype: float64

In [117]:
print("\nThe Accuracy is : ", lr.score(x_test,y_test)*100,"% .")


The Accuracy is :  92.84911979869887 % .


### Visulization