# Multiple LinearRegression (Memprediksi berapa jarak yang dapat ditempuh mobil dengan banyak predictor)

## Melakukan import semua library yang dibutuhkan

In [48]:
import pandas as pd
import numpy as np
import sklearn.model_selection as ms
import sklearn.linear_model as lm
from sklearn.linear_model import LinearRegression

## Menyiaokan Data

In [49]:
df2 = pd.read_csv('ML DATA/bensin2.csv' )
df2

Unnamed: 0,Liter,Penumpang,Bagasi,Kecepatan,RPM,Suhu,Kilometer
0,20,1,0,42,3000,29,142
1,25,2,15,50,4000,36,177
2,20,4,10,50,3000,24,144
3,30,5,25,50,3000,18,203
4,40,1,40,50,3200,30,273
...,...,...,...,...,...,...,...
60,31,3,0,50,3000,30,203
61,37,1,0,50,3000,22,241
62,23,2,10,50,3000,30,142
63,36,2,0,50,3000,20,241


In [50]:
df2.describe()

Unnamed: 0,Liter,Penumpang,Bagasi,Kecepatan,Suhu,Kilometer
count,65.0,65.0,65.0,65.0,65.0,65.0
mean,26.446154,2.384615,8.538462,49.876923,28.738462,165.307692
std,7.424686,1.558167,13.101857,0.992278,3.378595,68.571113
min,6.0,1.0,0.0,42.0,18.0,1.0
25%,23.0,1.0,0.0,50.0,29.0,142.0
50%,25.0,2.0,0.0,50.0,30.0,174.0
75%,30.0,4.0,15.0,50.0,30.0,211.0
max,45.0,6.0,60.0,50.0,36.0,278.0


*   Menghapus elemen '//','' pada kolom RPM untuk membersihkan data
*   Mengubah type data pada kolom RPM menjadi float













In [51]:
df2['RPM'] = df2['RPM'].str.replace('\\', '', regex=False).astype(float)

## Melihat korelasi antar data pada data frame



*   Tampak bahwa 'Bagasi' adalah variabel yang nilainya paling rendah mendekati nol (bagasi tidak memiliki hubungan dengan jarak kilometer yang ditempuh)
*   maka dari itu variabel 'Bagasi' tidak perlu disertakan pada proses pembuatan model





In [52]:
df2.corr(method='pearson')

Unnamed: 0,Liter,Penumpang,Bagasi,Kecepatan,RPM,Suhu,Kilometer
Liter,1.0,-0.087997,0.09756,0.10937,-0.046441,0.07947,0.700815
Penumpang,-0.087997,1.0,0.081542,0.111942,-0.069143,-0.039954,-0.034175
Bagasi,0.09756,0.081542,1.0,0.082096,0.154245,-0.049363,-0.023318
Kecepatan,0.10937,0.111942,0.082096,1.0,0.02187,-0.009752,0.042819
RPM,-0.046441,-0.069143,0.154245,0.02187,1.0,0.019557,0.085907
Suhu,0.07947,-0.039954,-0.049363,-0.009752,0.019557,1.0,0.136657
Kilometer,0.700815,-0.034175,-0.023318,0.042819,0.085907,0.136657,1.0


## Melihat informasi dari dataset

*   Ternyata datasetnya bersih dan tidak ada missing value




In [65]:
df2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 65 entries, 0 to 64
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Liter      65 non-null     int64  
 1   Penumpang  65 non-null     int64  
 2   Bagasi     65 non-null     int64  
 3   Kecepatan  65 non-null     int64  
 4   RPM        65 non-null     float64
 5   Suhu       65 non-null     int64  
 6   Kilometer  65 non-null     int64  
dtypes: float64(1), int64(6)
memory usage: 3.7 KB


## Melatih Model

*   90% dataset diatur untuk training dataset




In [53]:
X = df2[['Liter', 'Penumpang', 'Suhu', 'Kecepatan']]
y = df2[['Kilometer']]
X_train, X_test, y_train, y_test = ms.train_test_split(X, y, test_size=0.1, random_state=0)
model1 = lm.LinearRegression()
model1.fit(X_train, y_train)

## Melihat nilai intercept dan slope

In [54]:
print('intercept= ', model1.intercept_)
print('slope= ', model1.coef_)

intercept=  [71.89241946]
slope=  [[ 6.54072143  2.99204736  2.45423995 -3.21123461]]


## Melakukan prediksi dengan predictor (50 liter bensin, 5 penumpang, suhu 25 derajat, dan rata rata kecepatan 70 km/h)

In [55]:
data1 = pd.DataFrame([[50, 5, 25, 70]], columns=['Liter', 'Penumpang', 'Suhu', 'Kecepatan'])
hasil = model1.predict(data1)
print('Hasil prediksi: ', hasil)

Hasil prediksi:  [[250.45830327]]


## Melihat perbandingan hasil prediksi dan nilai asli

In [56]:
hasil = model1.predict(X_test)
hasil

array([[ 66.96464374],
       [136.74253048],
       [152.00577758],
       [141.37857467],
       [108.94740324],
       [249.57879157],
       [174.35461751]])

In [57]:
print (y_test)

    Kilometer
45        102
29        167
43        177
62        142
34        144
33        278
31        211


## Scoring model

In [58]:
r2 = model1.score(X_test, y_test)
print('R2= ', r2)

R2=  0.68229909916362
