<a href="https://colab.research.google.com/github/TaysTyas/Bakudan/blob/master/Model_Prediksi_Berat_Badan.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Modelling Prediksi Berat Badan

###Import Library

In [160]:
#import library
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import PolynomialFeatures
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
import warnings
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error,mean_absolute_error
warnings.filterwarnings('ignore')
import joblib

In [161]:
#load dataset
dataset  = "https://raw.githubusercontent.com/TaysTyas/Bakudan/master/weight-height.csv"
data = pd.read_csv(dataset)

In [162]:
#menampilkan 5 dataset teratas
data.head()

Unnamed: 0,Gender,Height,Weight
0,Male,73.847017,241.893563
1,Male,68.781904,162.310473
2,Male,74.110105,212.740856
3,Male,71.730978,220.04247
4,Male,69.881796,206.349801


In [163]:
#cek tipe data
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Gender  10000 non-null  object 
 1   Height  10000 non-null  float64
 2   Weight  10000 non-null  float64
dtypes: float64(2), object(1)
memory usage: 234.5+ KB


###Data Preprocessing

In [164]:
#membuat label
label_enc = LabelEncoder()
#Male = 1, Female = 0
data.Gender = label_enc.fit_transform(data.Gender)

In [165]:
data.head()

Unnamed: 0,Gender,Height,Weight
0,1,73.847017,241.893563
1,1,68.781904,162.310473
2,1,74.110105,212.740856
3,1,71.730978,220.04247
4,1,69.881796,206.349801


In [166]:
#menampilkan baris dan column pada dataset
data

Unnamed: 0,Gender,Height,Weight
0,1,73.847017,241.893563
1,1,68.781904,162.310473
2,1,74.110105,212.740856
3,1,71.730978,220.042470
4,1,69.881796,206.349801
...,...,...,...
9995,0,66.172652,136.777454
9996,0,67.067155,170.867906
9997,0,63.867992,128.475319
9998,0,69.034243,163.852461


In [167]:
#Mengubah data Height dari inch ke cm
def inch_to_cm(x):
    return x*2.54

#Mengubah data Weight dari pounds ke kg
def pounds_to_kg(x):
  return x*0.45359237

def convert(data):
  data["Height"] = data['Height'].apply(inch_to_cm)
  data["Weight"] = data["Weight"].apply(pounds_to_kg)
  return data
data

Unnamed: 0,Gender,Height,Weight
0,1,73.847017,241.893563
1,1,68.781904,162.310473
2,1,74.110105,212.740856
3,1,71.730978,220.042470
4,1,69.881796,206.349801
...,...,...,...
9995,0,66.172652,136.777454
9996,0,67.067155,170.867906
9997,0,63.867992,128.475319
9998,0,69.034243,163.852461


In [168]:
#menghapus colum Weight karena weight adalah output hasil prediksinya.
X = data.drop("Weight", axis=1)
y = data["Weight"]

In [169]:
X

Unnamed: 0,Gender,Height
0,1,73.847017
1,1,68.781904
2,1,74.110105
3,1,71.730978
4,1,69.881796
...,...,...
9995,0,66.172652
9996,0,67.067155
9997,0,63.867992
9998,0,69.034243


In [170]:
y

0       241.893563
1       162.310473
2       212.740856
3       220.042470
4       206.349801
           ...    
9995    136.777454
9996    170.867906
9997    128.475319
9998    163.852461
9999    113.649103
Name: Weight, Length: 10000, dtype: float64

In [171]:
X.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Gender  10000 non-null  int64  
 1   Height  10000 non-null  float64
dtypes: float64(1), int64(1)
memory usage: 156.4 KB


###Modeling

In [172]:
list_akurasi = []

####Linear Regression

In [173]:
#Training Model
log_regr = LinearRegression()
X_train, X_test, y_train, y_test = train_test_split(X, y)

modelLR = log_regr.fit(X_train, y_train)
y_pred = log_regr.predict(X_test)

accuracy = modelLR.score(X_test, y_test)
akurasiLR = accuracy * 100
list_akurasi.append(akurasiLR)

print(f"Akurasi Model menggunakan Algoritma Linear Regression : {akurasiLR}%\n")
print("MAE  : %.4f"%(mean_absolute_error(y_pred, y_test)))
print("MAPE : %.4f"%(mean_absolute_percentage_error(y_pred, y_test)))
print("MSE  : %.4f"%(mean_squared_error(y_pred, y_test)))
print("RMSE : %.4f"%(mean_squared_error(y_pred, y_test, squared=False)))


Akurasi Model menggunakan Algoritma Linear Regression : 91.01383610847338%

MAE  : 7.6193
MAPE : 0.0487
MSE  : 92.7641
RMSE : 9.6314


####KNN

In [174]:
#Training Model
modelKNN = KNeighborsRegressor(n_neighbors=10)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

modelKNN = modelKNN.fit(X_train, y_train)
y_pred = modelKNN.predict(X_test)

accuracy = modelKNN.score(X_test, y_test)
akurasiKNN = accuracy * 100
list_akurasi.append(akurasiKNN)

print(f"Akurasi Model menggunakan Algoritma KNN : {akurasiKNN}%\n")
print("MAE  : %.4f"%(mean_absolute_error(y_pred, y_test)))
print("MAPE : %.4f"%(mean_absolute_percentage_error(y_pred, y_test)))
print("MSE  : %.4f"%(mean_squared_error(y_pred, y_test)))
print("RMSE : %.4f"%(mean_squared_error(y_pred, y_test, squared=False)))

Akurasi Model menggunakan Algoritma KNN : 89.66718188491957%

MAE  : 8.3094
MAPE : 0.0535
MSE  : 107.5312
RMSE : 10.3697


####Random Forest

In [175]:
#Training Model
modelRF = RandomForestRegressor()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3)

modelRF = modelRF.fit(X_train, y_train)
y_pred = modelRF.predict(X_test)

accuracy = modelRF.score(X_test, y_test)
akurasiRF = accuracy * 100
list_akurasi.append(akurasiRF)

print(f"Akurasi Model menggunakan Algoritma Random Forest : {akurasiRF}%\n")
print("MAE  : %.4f"%(mean_absolute_error(y_pred, y_test)))
print("MAPE : %.4f"%(mean_absolute_percentage_error(y_pred, y_test)))
print("MSE  : %.4f"%(mean_squared_error(y_pred, y_test)))
print("RMSE : %.4f"%(mean_squared_error(y_pred, y_test, squared=False)))

Akurasi Model menggunakan Algoritma Random Forest : 85.32092407618796%

MAE  : 9.6333
MAPE : 0.0622
MSE  : 147.8720
RMSE : 12.1603


####SVR

In [176]:
#Training Model
modelSVR = SVR(kernel='linear')
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

modelSVM = modelSVR.fit(X_train, y_train)
y_pred = modelSVR.predict(X_test)

accuracy = modelSVR.score(X_test, y_test)
akurasiSVR = accuracy * 100
list_akurasi.append(akurasiSVR)

print(f"Akurasi Model menggunakan Algoritma SVM : {akurasiSVR}%\n")
print("MAE  : %.4f"%(mean_absolute_error(y_pred, y_test)))
print("MAPE : %.4f"%(mean_absolute_percentage_error(y_pred, y_test)))
print("MSE  : %.4f"%(mean_squared_error(y_pred, y_test)))
print("RMSE : %.4f"%(mean_squared_error(y_pred, y_test, squared=False)))

Akurasi Model menggunakan Algoritma SVM : 90.08110027700323%

MAE  : 7.9149
MAPE : 0.0510
MSE  : 97.2218
RMSE : 9.8601


####Decision Tree

In [177]:
#Training Model
modelDT = DecisionTreeRegressor()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

modelDT = modelDT.fit(X_train, y_train)
y_pred = modelDT.predict(X_test)

accuracy = modelDT.score(X_test, y_test)
akurasiDT = accuracy * 100
list_akurasi.append(akurasiDT)

print(f"Akurasi Model menggunakan Algoritma Decision Tree : {akurasiDT}%\n")
print("MAE  : %.4f"%(mean_absolute_error(y_pred, y_test)))
print("MAPE : %.4f"%(mean_absolute_percentage_error(y_pred, y_test)))
print("MSE  : %.4f"%(mean_squared_error(y_pred, y_test)))
print("RMSE : %.4f"%(mean_squared_error(y_pred, y_test, squared=False)))

Akurasi Model menggunakan Algoritma Decision Tree : 80.70490620614827%

MAE  : 11.4653
MAPE : 0.0736
MSE  : 202.1309
RMSE : 14.2173


####Polynomial Regression

In [178]:
#Training Model
poly = PolynomialFeatures(degree=2)
X_poly = poly.fit_transform(X)

modelPR = LinearRegression()
X_train, X_test, y_train, y_test = train_test_split(X_poly, y, test_size = 0.3)

model = modelPR.fit(X_train, y_train)
y_pred = modelPR.predict(X_test)

accuracy = modelPR.score(X_test, y_test)
akurasiPR = accuracy * 100
list_akurasi.append(akurasiPR)

print(f"Akurasi Model menggunakan Algoritma Polynomial Regression : {akurasiPR}%\n")
print("MAE  : %.4f"%(mean_absolute_error(y_pred, y_test)))
print("MAPE : %.4f"%(mean_absolute_percentage_error(y_pred, y_test)))
print("MSE  : %.4f"%(mean_squared_error(y_pred, y_test)))
print("RMSE : %.4f"%(mean_squared_error(y_pred, y_test, squared=False)))

Akurasi Model menggunakan Algoritma Polynomial Regression : 90.34933856594532%

MAE  : 7.8005
MAPE : 0.0505
MSE  : 97.3916
RMSE : 9.8687


###Memilih model dengan akurasi terbaik untuk di deploy

In [179]:
list_akurasi.sort()
#Melihat Algoritma mana yang menghasilkan akurasi model Tertinggi
if list_akurasi[5] == akurasiLR:
  joblib.dump((modelLR), "weight-prediction-using-linear-regression.pkl")
  print("Algoritma yang menghasilkan AKURASI MODEL TERBAIK adalah Linear Regression\n")

elif list_akurasi[5] == akurasiPR:
  joblib.dump((modelPR), "weight-prediction-using-polynomial-regression.pkl")
  print("Algoritma yang menghasilkan AKURASI MODEL TERBAIK adalah Polynomial Regression\n")
  
elif list_akurasi[5] == akurasiSVR:
  joblib.dump((modelSVR), "weight-prediction-using-svr.pkl")
  print("Algoritma yang menghasilkan AKURASI MODEL TERBAIK adalah SVR\n")
  
elif list_akurasi[5] == akurasiDT:
  joblib.dump((modelDT), "weight-prediction-using-decision-tree.pkl")
  print("Algoritma yang menghasilkan AKURASI MODEL TERBAIK adalah Decision Tree\n")
  
elif list_akurasi[0] == akurasiRF:
  joblib.dump((modelRF), "weight-prediction-using-random-forest.pkl")
  print("Algoritma yang menghasilkan AKURASI MODEL TERBAIK adalah Random Forest\n")
  
else:
  joblib.dump((modelKNN), "weight-prediction-using-knn.pkl")
  print("Algoritma yang menghasilkan AKURASI MODEL TERBAIK adalah KNN\n")


Algoritma yang menghasilkan AKURASI MODEL TERBAIK adalah Linear Regression

