# linear_regression

In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import linear_model
%matplotlib inline

In [6]:
df=pd.read_csv("car data.csv")
df.head()

Unnamed: 0,Speed,Car_age,Experience,Risk
0,200,15,3.0,85
1,90,17,6.6,20
2,165,12,5.0,93
3,110,20,13.0,60
4,140,30,4.0,82


In [7]:
df.Experience

0      3.0
1      6.6
2      5.0
3     13.0
4      4.0
5      6.6
6      8.0
7     10.0
8      9.0
9      7.0
10    14.0
11    10.0
12     5.0
13    14.0
14     6.0
15     5.5
16     6.0
17     9.0
18    13.0
19     5.0
20     7.8
21     1.0
22     5.5
23     8.0
Name: Experience, dtype: float64

In [8]:
df.Risk

0     85
1     20
2     93
3     60
4     82
5     10
6     90
7     56
8     78
9     48
10    98
11    49
12    78
13    48
14    46
15    67
16    89
17    73
18    39
19    91
20    78
21    46
22    71
23    50
Name: Risk, dtype: int64

# Create Linear Object

In [9]:
reg = linear_model.LinearRegression()

# Fit model

In [10]:
reg.fit(df[['Speed', 'Car_age','Experience']], df.Risk)

LinearRegression()

# Predict the Risk

In [11]:
reg.predict([[160, 10, 5]])

array([66.3475895])

In [12]:
reg.coef_

array([ 0.01532006,  0.35727535, -0.80609781])

In [13]:
reg.intercept_

64.35411515073807

# Save Model To a File Using Python Pickle

In [14]:
model = linear_model.LinearRegression()
model.fit(df[['Speed', 'Car_age','Experience']], df.Risk)

LinearRegression()

In [15]:
import pickle

In [16]:
with open('model_pickle','wb') as file:
    pickle.dump(model,file)

In [17]:
with open('model_pickle','rb') as file:
    mp = pickle.load(file)

In [18]:
mp.coef_

array([ 0.01532006,  0.35727535, -0.80609781])

In [19]:
mp.intercept_

64.35411515073807

In [20]:
mp.predict([[160, 10, 5]])

array([66.3475895])

# Categorical Variables and One Hot Encoding

In [27]:
df2=pd.read_csv("car sells.csv")
df2.head()

Unnamed: 0,Car_Model,Mileage,price,Age
0,BMW,12500,25000,6
1,BMW,14000,28000,8
2,BMW,56000,25400,7
3,BMW,25000,34000,5
4,AUDI,12000,25400,9


# Using sklearn OneHotEncoder

In [28]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()

In [29]:
dfle = df2
dfle.Car_Model = le.fit_transform(dfle.Car_Model)
dfle

Unnamed: 0,Car_Model,Mileage,price,Age
0,1,12500,25000,6
1,1,14000,28000,8
2,1,56000,25400,7
3,1,25000,34000,5
4,0,12000,25400,9
5,0,14000,41000,10
6,0,15800,25400,8
7,0,14630,22000,5
8,0,15870,28000,4
9,2,15230,20000,7


# Making feature

In [30]:
X = dfle[['Car_Model','Mileage','Age' ]].values
X

array([[    1, 12500,     6],
       [    1, 14000,     8],
       [    1, 56000,     7],
       [    1, 25000,     5],
       [    0, 12000,     9],
       [    0, 14000,    10],
       [    0, 15800,     8],
       [    0, 14630,     5],
       [    0, 15870,     4],
       [    2, 15230,     7],
       [    2, 17890,     6],
       [    2, 12540,     8],
       [    2, 14985,     7],
       [    3, 13659,     5],
       [    3, 15236,     9],
       [    3, 17854,    10],
       [    3, 16325,     8],
       [    3, 15632,     5],
       [    3, 18456,     4],
       [    4, 14785,     7],
       [    4, 15496,     6],
       [    4, 17594,     8],
       [    4, 14985,     7],
       [    4, 13659,     5],
       [    4, 15236,     9]], dtype=int64)

In [31]:
y = dfle.price.values
y

array([25000, 28000, 25400, 34000, 25400, 41000, 25400, 22000, 28000,
       20000, 25400, 24000, 25400, 40000, 23000, 25400, 28000, 28300,
       25400, 32000, 28000, 31000, 25400, 29000, 25400], dtype=int64)

# Simply prediction

In [50]:
model = linear_model.LinearRegression()
model.fit(dfle[['Car_Model', 'Mileage','Age']], dfle.price)

LinearRegression()

In [51]:
model.predict([[0,12500,6]])

array([27652.996843])

# Train test splitting

In [52]:
from sklearn.model_selection import train_test_split
train_set_X,test_set_X,train_set_y,test_set_y=train_test_split(X,y,test_size=0.3,shuffle=True)
train_set_X.shape

(17, 7)

# Generating the model and train regression model

In [53]:
from sklearn.linear_model import LinearRegression
r=LinearRegression()
r.fit(train_set_X,train_set_y)

LinearRegression()

In [54]:
y_predict=r.predict(test_set_X) #that is predict the price 
y_predict

array([23864.5380753 , 26125.96449436, 28357.77104173, 26970.23873622,
       21529.50010267, 33086.48370213, 20959.45416344, 28284.27989762])

# Check Coefficients, Intercept

In [55]:
r.coef_

array([-1.83135309e+03,  8.51766957e+02, -3.78518237e+03,  2.96906782e+03,
        1.79570068e+03, -2.77294048e-02, -1.24169514e+03])

In [56]:
r.intercept_

35595.97035890645

In [58]:
y_pred=r.intercept_+r.coef_+test_set_X
y_pred

array([[33764.61727315, 36447.73731564, 31811.7879929 , 38565.03817442,
        37391.67103843, 53485.9426295 , 34360.27521469],
       [33764.61727315, 36448.73731564, 31810.7879929 , 38565.03817442,
        37391.67103843, 49595.9426295 , 34362.27521469],
       [33765.61727315, 36447.73731564, 31810.7879929 , 38565.03817442,
        37391.67103843, 51465.9426295 , 34358.27521469],
       [33764.61727315, 36447.73731564, 31810.7879929 , 38565.03817442,
        37392.67103843, 53189.9426295 , 34362.27521469],
       [33764.61727315, 36447.73731564, 31811.7879929 , 38565.03817442,
        37391.67103843, 48135.9426295 , 34362.27521469],
       [33764.61727315, 36447.73731564, 31810.7879929 , 38566.03817442,
        37391.67103843, 54051.9426295 , 34358.27521469],
       [33765.61727315, 36447.73731564, 31810.7879929 , 38565.03817442,
        37391.67103843, 49595.9426295 , 34364.27521469],
       [33764.61727315, 36447.73731564, 31810.7879929 , 38565.03817442,
        37392.67103843, 5