# Posttest KB 6: Deep Neural Network (DNN)

* Import Library yang digunakan

In [70]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
from tensorflow.keras.layers import Dense
from sklearn.impute import SimpleImputer

In [71]:
data = pd.read_csv('Forbes Top2000 2017.csv')
data

Unnamed: 0,name,year,selling_price,km_driven,fuel,seller_type,transmission,owner
0,Maruti 800 AC,2007,60000,70000,Petrol,Individual,Manual,First Owner
1,Maruti Wagon R LXI Minor,2007,135000,50000,Petrol,Individual,Manual,First Owner
2,Hyundai Verna 1.6 SX,2012,600000,100000,Diesel,Individual,Manual,First Owner
3,Datsun RediGO T Option,2017,250000,46000,Petrol,Individual,Manual,First Owner
4,Honda Amaze VX i-DTEC,2014,450000,141000,Diesel,Individual,Manual,Second Owner
...,...,...,...,...,...,...,...,...
4335,Hyundai i20 Magna 1.4 CRDi (Diesel),2014,409999,80000,Diesel,Individual,Manual,Second Owner
4336,Hyundai i20 Magna 1.4 CRDi,2014,409999,80000,Diesel,Individual,Manual,Second Owner
4337,Maruti 800 AC BSIII,2009,110000,83000,Petrol,Individual,Manual,Second Owner
4338,Hyundai Creta 1.6 CRDi SX Option,2016,865000,90000,Diesel,Individual,Manual,First Owner


In [72]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4340 entries, 0 to 4339
Data columns (total 8 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   name           4340 non-null   object
 1   year           4340 non-null   int64 
 2   selling_price  4340 non-null   int64 
 3   km_driven      4340 non-null   int64 
 4   fuel           4340 non-null   object
 5   seller_type    4340 non-null   object
 6   transmission   4340 non-null   object
 7   owner          4340 non-null   object
dtypes: int64(3), object(5)
memory usage: 271.4+ KB


# Preprocessing dataset

* Melihat apakah ada data null atau duplikat

In [73]:
data.isna().sum()

name             0
year             0
selling_price    0
km_driven        0
fuel             0
seller_type      0
transmission     0
owner            0
dtype: int64

In [74]:
data = pd.read_csv('Forbes Top2000 2017.csv')

print(data.columns)

print(data['Sector'].dtype)
print(data['Industry'].dtype)

simpleImputerObjek = SimpleImputer(strategy='most_frequent')
data.loc[:, 'Sector'] = simpleImputerObjek.fit_transform(data[['Sector']])
data.loc[:, 'Industry'] = simpleImputerObjek.fit_transform(data[['Industry']])

print(data.isnull().sum())


* Mengubah Nilai Target Dengan Encoding

In [75]:
X = data.drop('selling_price', axis=1) 
y = data['selling_price']

In [76]:
label = LabelEncoder()
y = label.fit_transform(y)

In [77]:
# Memilih atribut yang dianggap memengaruhi hasil prediksi
selected_features = ['year', 'km_driven']
X = data[selected_features]
y = data['selling_price']

* Menerapkan One-Hot Encoding menggunakan get_dummies dari Pandas

In [78]:
try:
    X = pd.get_dummies(X, columns=['name', 'fuel', 'seller_type', 'transmission', 'owner'], drop_first=True)
except KeyError as e:
    print("KeyError:", e)
    print("Error Details:", str(e))

KeyError: "None of [Index(['name', 'fuel', 'seller_type', 'transmission', 'owner'], dtype='object')] are in the [columns]"
Error Details: "None of [Index(['name', 'fuel', 'seller_type', 'transmission', 'owner'], dtype='object')] are in the [columns]"


* Standarisasi

In [79]:
print('Nilai standarisasi sebelum dilakukan scaling: ')
std = data.std(numeric_only=True)
np.std(data)

Nilai standarisasi sebelum dilakukan scaling: 


  return std(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


year                  4.214858
selling_price    578482.079219
km_driven         46638.728140
dtype: float64

In [80]:
standard_scaler = StandardScaler()
x_standard = standard_scaler.fit_transform(
    data[['year', 'km_driven', 'selling_price']]
)

In [81]:
print('Nilai sesudah dilakukan scalling: ')
np.std(x_standard)

Nilai sesudah dilakukan scalling: 


1.0

# Split dataset menjadi train dan test

In [82]:
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size = 0.15, random_state=42)

# Model sequential

In [83]:
model = Sequential([
    Dense(units=64, activation='relu',input_shape=(None,1)), #Output Layer Wajib units
    Dense(units=32, activation='relu'),
    Dense(units=1, activation='sigmoid')
])

model.summary()

Model: "sequential_7"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_21 (Dense)            (None, 128)               384       
                                                                 
 dense_22 (Dense)            (None, 32)                4128      
                                                                 
 dense_23 (Dense)            (None, 1)                 33        
                                                                 
Total params: 4545 (17.75 KB)
Trainable params: 4545 (17.75 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


# Model Training

__Model Compile__

In [92]:
model.compile(
    optimizer = 'adam',
    loss = 'binary_crossentropy',
    metrics=['accuracy']
)

__Call Back__

* Callback untuk menghentikan pelatihan jika tidak ada perbaikan pada validation loss

In [93]:
stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

* Callback untuk menyimpan model terbaik berdasarkan validation loss

In [94]:
check = ModelCheckpoint('best_model.h5', save_best_only=True)

__Model Fit__

In [95]:
model.fit(
    x = x_train,
    y = y_train,
    epochs=100, #100x Pembelajaran
    validation_data = (X, y),
    batch_size=64,
    callbacks=[stop, check]
)

Epoch 1/100
Epoch 2/100

  saving_api.save_model(


Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100


<keras.src.callbacks.History at 0x1bed0861de0>