<a href="https://colab.research.google.com/github/TaysTyas/Bakudan/blob/testmodel/Model_Prediksi_Berat_Badan.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Modelling Prediksi Berat Badan

###Import Library

In [2]:
#import library
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
import warnings
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error,mean_absolute_error
warnings.filterwarnings('ignore')
import joblib

In [3]:
#load dataset
dataset  = "https://raw.githubusercontent.com/TaysTyas/Bakudan/master/weight-height.csv"
data = pd.read_csv(dataset)

In [4]:
#menampilkan 5 dataset teratas
data.head()

Unnamed: 0,Gender,Height,Weight
0,Male,73.847017,241.893563
1,Male,68.781904,162.310473
2,Male,74.110105,212.740856
3,Male,71.730978,220.04247
4,Male,69.881796,206.349801


In [5]:
#cek tipe data
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Gender  10000 non-null  object 
 1   Height  10000 non-null  float64
 2   Weight  10000 non-null  float64
dtypes: float64(2), object(1)
memory usage: 234.5+ KB


###Data Preprocessing

In [6]:
#membuat label
label_enc = LabelEncoder()
#Male = 1, Female = 0
data.Gender = label_enc.fit_transform(data.Gender)

In [7]:
data.head()

Unnamed: 0,Gender,Height,Weight
0,1,73.847017,241.893563
1,1,68.781904,162.310473
2,1,74.110105,212.740856
3,1,71.730978,220.04247
4,1,69.881796,206.349801


In [8]:
#menampilkan baris dan column pada dataset
data

Unnamed: 0,Gender,Height,Weight
0,1,73.847017,241.893563
1,1,68.781904,162.310473
2,1,74.110105,212.740856
3,1,71.730978,220.042470
4,1,69.881796,206.349801
...,...,...,...
9995,0,66.172652,136.777454
9996,0,67.067155,170.867906
9997,0,63.867992,128.475319
9998,0,69.034243,163.852461


In [9]:
#Mengubah data Height dari inch ke cm
def inch_to_cm(x):
    return x*2.54

#Mengubah data Weight dari pounds ke kg
def pounds_to_kg(x):
  return x*0.45359237

def convert(data):
  data["Height"] = data['Height'].apply(inch_to_cm)
  data["Weight"] = data["Weight"].apply(pounds_to_kg)
  return data
data

Unnamed: 0,Gender,Height,Weight
0,1,73.847017,241.893563
1,1,68.781904,162.310473
2,1,74.110105,212.740856
3,1,71.730978,220.042470
4,1,69.881796,206.349801
...,...,...,...
9995,0,66.172652,136.777454
9996,0,67.067155,170.867906
9997,0,63.867992,128.475319
9998,0,69.034243,163.852461


In [10]:
#menghapus colum Weight karena weight adalah output hasil prediksinya.
X = data.drop("Weight", axis=1)
y = data["Weight"]

In [11]:
X

Unnamed: 0,Gender,Height
0,1,73.847017
1,1,68.781904
2,1,74.110105
3,1,71.730978
4,1,69.881796
...,...,...
9995,0,66.172652
9996,0,67.067155
9997,0,63.867992
9998,0,69.034243


In [12]:
y

0       241.893563
1       162.310473
2       212.740856
3       220.042470
4       206.349801
           ...    
9995    136.777454
9996    170.867906
9997    128.475319
9998    163.852461
9999    113.649103
Name: Weight, Length: 10000, dtype: float64

In [13]:
X.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Gender  10000 non-null  int64  
 1   Height  10000 non-null  float64
dtypes: float64(1), int64(1)
memory usage: 156.4 KB


###Modeling

####Linear Regression

In [14]:
#Training Model
log_regr = LinearRegression()
X_train, X_test, y_train, y_test = train_test_split(X, y)

model = log_regr.fit(X_train, y_train)
y_pred = log_regr.predict(X_test)

accuracy = model.score(X_test, y_test)
print(f"Akurasi Model: {accuracy * 100}%\n")
print("MAE  : %.4f"%(mean_absolute_error(y_pred, y_test)))
print("MAPE : %.4f"%(mean_absolute_percentage_error(y_pred, y_test)))
print("MSE  : %.4f"%(mean_squared_error(y_pred, y_test)))
print("RMSE : %.4f"%(mean_squared_error(y_pred, y_test, squared=False)))



Akurasi Model: 90.43815819719279%

MAE  : 7.8966
MAPE : 0.0510
MSE  : 97.6045
RMSE : 9.8795


####KNN

In [15]:
#Training Model
model = KNeighborsRegressor(n_neighbors=10)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

model = model.fit(X_train, y_train)
y_pred = model.predict(X_test)

accuracy = model.score(X_test, y_test)
print(f"Akurasi Model: {accuracy * 100}%\n")
print("MAE  : %.4f"%(mean_absolute_error(y_pred, y_test)))
print("MAPE : %.4f"%(mean_absolute_percentage_error(y_pred, y_test)))
print("MSE  : %.4f"%(mean_squared_error(y_pred, y_test)))
print("RMSE : %.4f"%(mean_squared_error(y_pred, y_test, squared=False)))

Akurasi Model: 89.05320220413647%

MAE  : 8.3705
MAPE : 0.0534
MSE  : 109.7624
RMSE : 10.4768


####Random Forest

In [16]:
#Training Model
model = RandomForestRegressor()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3)

model = model.fit(X_train, y_train)
y_pred = model.predict(X_test)

accuracy = model.score(X_test, y_test)
print(f"Akurasi Model: {accuracy * 100}%\n")
print("MAE  : %.4f"%(mean_absolute_error(y_pred, y_test)))
print("MAPE : %.4f"%(mean_absolute_percentage_error(y_pred, y_test)))
print("MSE  : %.4f"%(mean_squared_error(y_pred, y_test)))
print("RMSE : %.4f"%(mean_squared_error(y_pred, y_test, squared=False)))

Akurasi Model: 85.94500050924574%

MAE  : 9.5517
MAPE : 0.0616
MSE  : 144.7343
RMSE : 12.0306


###SVM

In [17]:
#Training Model
model = SVR(kernel='linear')
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

model = model.fit(X_train, y_train)
y_pred = model.predict(X_test)

accuracy = model.score(X_test, y_test)
print(f"Akurasi Model: {accuracy * 100}%\n")
print("MAE  : %.4f"%(mean_absolute_error(y_pred, y_test)))
print("MAPE : %.4f"%(mean_absolute_percentage_error(y_pred, y_test)))
print("MSE  : %.4f"%(mean_squared_error(y_pred, y_test)))
print("RMSE : %.4f"%(mean_squared_error(y_pred, y_test, squared=False)))

Akurasi Model: 90.53211950463526%

MAE  : 8.0883
MAPE : 0.0519
MSE  : 100.8992
RMSE : 10.0449


###Memilih model dengan akurasi terbaik untuk di deploy