#**Stunting Classification Using Neural Network**

---



In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf

In [None]:
!ls

sample_data


# **Import Dataset**

In [None]:
from google.colab import files
uploaded = files.upload()

Saving Data Stunting.xlsx to Data Stunting.xlsx


In [None]:
# Impor dataset dari file Excel
df = pd.read_excel('Data Stunting.xlsx')

In [None]:
# Call dataset
df

Unnamed: 0,Nama,JK,Usia,Berat,Tinggi,Status
0,FAISAL FAKIH RUKMANA,L,2.466667,4.9,58,Tidak Stunting
1,MUHAMMAD HAFIDZ ATHAFARIZ S,L,58.466667,17.0,109,Tidak Stunting
2,VIONA FEBBY YASMIN,P,5.166667,7.1,68.5,Tidak Stunting
3,GHEGHE KALISTA PUTRI,P,45.300000,13.8,95,Tidak Stunting
4,LINGGA OKTAVIAN WIRATAMA,L,53.633333,18.6,106,Tidak Stunting
...,...,...,...,...,...,...
6672,KAUTSARRAKY,L,0.000000,3.0,50,Tidak Stunting
6673,ALEENA H,P,0.000000,3.0,50,Tidak Stunting
6674,M EMRAN,L,0.000000,3.0,50,Stunting
6675,REZVAN A P,L,0.000000,3.0,50,Tidak Stunting


# **Data Cleaning**

In [None]:
# Check Missing Value
nan_data = df[df.isna().any(axis=1)]
nan_data

Unnamed: 0,Nama,JK,Usia,Berat,Tinggi,Status


In [None]:
missing_values = df.isnull().sum()
print(missing_values)

Nama      0
JK        0
Usia      0
Berat     0
Tinggi    0
Status    0
dtype: int64


In [None]:
# Mengecek adakah mixed value
df.dtypes

Nama       object
JK         object
Usia      float64
Berat     float64
Tinggi     object
Status     object
dtype: object

In [None]:
# Mengecek baris mana yang tidak bertipe float pada kolom Tinggi
non_float_rows = df[~df['Tinggi'].apply(lambda x: isinstance(x, float))]
print(non_float_rows)

                              Nama  JK       Usia  Berat Tinggi  \
0            FAISAL FAKIH RUKMANA   L    2.466667    4.9     58   
1     MUHAMMAD HAFIDZ ATHAFARIZ S   L   58.466667   17.0    109   
3            GHEGHE KALISTA PUTRI   P   45.300000   13.8     95   
4        LINGGA OKTAVIAN WIRATAMA   L   53.633333   18.6    106   
6              ASTI RAHMA SAPITRI   P   59.533333   18.5    107   
...                            ...  ..        ...    ...    ...   
6672                  KAUTSARRAKY   L    0.000000    3.0     50   
6673                     ALEENA H   P    0.000000    3.0     50   
6674                      M EMRAN   L    0.000000    3.0     50   
6675                   REZVAN A P   L    0.000000    3.0     50   
6676     APRIANZA QEISYAM GUNAWAN   L    0.000000    2.8     48   

              Status  
0     Tidak Stunting  
1     Tidak Stunting  
3     Tidak Stunting  
4     Tidak Stunting  
6     Tidak Stunting  
...              ...  
6672  Tidak Stunting  
6673  Tidak

In [None]:
df['Tinggi'] = df['Tinggi'].fillna('')

In [None]:
# Ketika diconvert ke float error, ternyata terdapat baris yang mengandung (-)
strip_rows = df[df['Tinggi'].astype(str).str.contains('-')]
print(strip_rows)

                  Nama  JK       Usia  Berat Tinggi          Status
2087  ARKENZY ZAVIYAR   L   37.633333   13.4      -  Tidak Stunting


In [None]:
df = df.drop(2087)

In [None]:
df['Tinggi'] = df['Tinggi'].astype(float)
print(df['Tinggi'].dtype)

float64


In [None]:
df

Unnamed: 0,Nama,JK,Usia,Berat,Tinggi,Status
0,FAISAL FAKIH RUKMANA,L,2.466667,4.9,58.0,Tidak Stunting
1,MUHAMMAD HAFIDZ ATHAFARIZ S,L,58.466667,17.0,109.0,Tidak Stunting
2,VIONA FEBBY YASMIN,P,5.166667,7.1,68.5,Tidak Stunting
3,GHEGHE KALISTA PUTRI,P,45.300000,13.8,95.0,Tidak Stunting
4,LINGGA OKTAVIAN WIRATAMA,L,53.633333,18.6,106.0,Tidak Stunting
...,...,...,...,...,...,...
6672,KAUTSARRAKY,L,0.000000,3.0,50.0,Tidak Stunting
6673,ALEENA H,P,0.000000,3.0,50.0,Tidak Stunting
6674,M EMRAN,L,0.000000,3.0,50.0,Stunting
6675,REZVAN A P,L,0.000000,3.0,50.0,Tidak Stunting


In [None]:
# Menghapus kolom yang tidak dibutuhkan
df = df.drop(['Nama'], axis=1)
df

Unnamed: 0,JK,Usia,Berat,Tinggi,Status
0,L,2.466667,4.9,58.0,Tidak Stunting
1,L,58.466667,17.0,109.0,Tidak Stunting
2,P,5.166667,7.1,68.5,Tidak Stunting
3,P,45.300000,13.8,95.0,Tidak Stunting
4,L,53.633333,18.6,106.0,Tidak Stunting
...,...,...,...,...,...
6672,L,0.000000,3.0,50.0,Tidak Stunting
6673,P,0.000000,3.0,50.0,Tidak Stunting
6674,L,0.000000,3.0,50.0,Stunting
6675,L,0.000000,3.0,50.0,Tidak Stunting


# **Data Processing**

**1. Mengecek Kemungkinan Imbalance Data**

In [None]:
min_label_len = len(df[df['Status']=='Stunting'])
print(min_label_len)

64


In [None]:
max_label_index = df[df['Status']=='Tidak Stunting'].index
print(max_label_index)

Int64Index([   0,    1,    2,    3,    4,    5,    6,    7,    8,    9,
            ...
            6664, 6665, 6667, 6669, 6670, 6671, 6672, 6673, 6675, 6676],
           dtype='int64', length=6612)


In [None]:
# Terjadi imbalance karena stunting sebanyak 64 data sedangkan tidak stunting 6612 data

**2. Undersampling**

In [None]:
# Melakukan undersampling pada label yang terbesar
random_max_index = np.random.choice(max_label_index,
                                    min_label_len,
                                    replace=False)
print(len(random_max_index))

64


In [None]:
min_label_index = df[df['Status']=='Stunting'].index
print(min_label_index)

Int64Index([  63,  101,  198,  200,  227,  242,  244,  283,  407,  423,  443,
             524,  544,  575,  591,  594,  796,  911, 1033, 1042, 1080, 1129,
            1154, 1245, 1644, 1650, 1836, 1883, 1930, 2089, 2184, 2187, 2434,
            2485, 2748, 2804, 2883, 2911, 2936, 2976, 3154, 3531, 3576, 4203,
            4665, 4767, 5019, 5114, 5134, 5346, 5358, 6557, 6559, 6560, 6561,
            6623, 6626, 6634, 6649, 6653, 6661, 6666, 6668, 6674],
           dtype='int64')


In [None]:
under_sample_index = np.concatenate([min_label_index, random_max_index])
under_sample = df.loc[under_sample_index]
print(under_sample_index)

# Menentukan fitur dan label
x = under_sample.loc[:, df.columns != 'Status']
y = under_sample.loc[:, df.columns == 'Status']

[  63  101  198  200  227  242  244  283  407  423  443  524  544  575
  591  594  796  911 1033 1042 1080 1129 1154 1245 1644 1650 1836 1883
 1930 2089 2184 2187 2434 2485 2748 2804 2883 2911 2936 2976 3154 3531
 3576 4203 4665 4767 5019 5114 5134 5346 5358 6557 6559 6560 6561 6623
 6626 6634 6649 6653 6661 6666 6668 6674 2133 5208  121 2165 4823 6228
 1440 1646 1043 1967 4763 5997 2527 4222 3269 1488  942 1383 4848 2753
 3093 1592 5063 2023 1952 6576 6118 6528  570 6552 1857 3407 1213  664
 4939 4666 4345 1269 1131 2230  345 4453  991 1939 3309 6062 1557 1038
 5666 5617 4831 5337 4349 5403 5520 4191 5348 5981  540 1185 5793  383
 1567 4547]


In [None]:
print(x)

      JK       Usia  Berat  Tinggi
63    P   19.300000    5.7    72.0
101   P   35.300000   10.7    82.2
198   L   14.500000   10.0    73.0
200   L   50.866667   12.0    95.0
227   L    4.966667    6.6    60.3
...   ..        ...    ...     ...
1185  P   40.900000   12.0    93.0
5793  P   13.333333   12.1    80.0
383   L   28.066667   11.5    86.2
1567  L   16.300000    9.9    75.3
4547  P   52.900000   15.0   102.0

[128 rows x 4 columns]


In [None]:
print(y)

              Status
63          Stunting
101         Stunting
198         Stunting
200         Stunting
227         Stunting
...              ...
1185  Tidak Stunting
5793  Tidak Stunting
383   Tidak Stunting
1567  Tidak Stunting
4547  Tidak Stunting

[128 rows x 1 columns]


In [None]:
# TRANSFORMASI & ENCODING Kolom Fitur
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder

column_transformer = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [0])], remainder='passthrough')
x = np.array(column_transformer.fit_transform(x))

In [None]:
print(x)

[[  0.           1.          19.3          5.7         72.        ]
 [  0.           1.          35.3         10.7         82.2       ]
 [  1.           0.          14.5         10.          73.        ]
 [  1.           0.          50.86666667  12.          95.        ]
 [  1.           0.           4.96666667   6.6         60.3       ]
 [  0.           1.           7.2          5.5         62.3       ]
 [  1.           0.          20.93333333   9.5         79.        ]
 [  0.           1.          20.2         10.5         74.        ]
 [  0.           1.          58.86666667  12.5         98.        ]
 [  0.           1.          28.33333333   9.          80.        ]
 [  1.           0.          41.63333333  10.          90.        ]
 [  0.           1.           0.           1.8         47.        ]
 [  0.           1.          34.          12.          86.        ]
 [  0.           1.          21.           8.          75.        ]
 [  1.           0.          23.          10.8  

In [None]:
# ENCODING pada Kolom Label
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

  y = column_or_1d(y, warn=True)


In [None]:
print(y)

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]


**3. Oversample**

In [None]:
#SMOTE
#from imblearn.over_sampling import SMOTE

#Sebelum di SMOTE
#print(x.shape)
#print(y.shape)

In [None]:
#Sesudah di SMOTE
#sm = SMOTE(random_state= 42)
#x_sampling, y_sampling = sm.fit_resample(x,y)

In [None]:
#print(x_sampling.shape)
#print(y_sampling.shape)

#Sempat memakai oversampling tapi gagal

**3. Split Data**

In [None]:
# Split train set dan Test set
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

**4. Scaling Data**

In [None]:
from sklearn.preprocessing import StandardScaler
standard_scal = StandardScaler()

x_train[:, 2:] = standard_scal.fit_transform(x_train[:, 2:])
x_test[:, 2:] = standard_scal.fit_transform(x_test[:, 2:])

In [None]:
print(x_train)

[[ 1.          0.          0.87968204  1.27746784  0.95149696]
 [ 1.          0.          1.60463223  1.0808072   1.15394312]
 [ 1.          0.          0.50850754  1.02461845  0.34415847]
 [ 0.          1.         -0.64754636 -1.53196984 -0.80303644]
 [ 0.          1.          0.20499507  0.2379759   0.14171231]
 [ 0.          1.         -0.14298103 -1.16674294 -0.26318001]
 [ 1.          0.         -0.8215344  -0.35200601 -0.58034566]
 [ 1.          0.          0.51817355  0.51891966  0.41164053]
 [ 0.          1.          0.92027925  0.18178714  0.7490508 ]
 [ 1.          0.          0.83328523  0.79986343  0.95149696]
 [ 1.          0.          0.02327422  0.65939155  0.59384207]
 [ 0.          1.         -0.22997505 -0.60485541 -0.26318001]
 [ 1.          0.         -1.04191926 -0.35200601 -0.57359746]
 [ 1.          0.          0.01360822  0.2379759   0.05398564]
 [ 1.          0.          1.29145375  0.94033532  0.81653285]
 [ 1.          0.          0.89321445  1.13699595  1.21

# **Model**

In [None]:
# Membuat Model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

model = Sequential()
model.add(Dense(32, input_dim=len(x_train[0,:]), activation='relu'))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
print(model.summary())

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_2 (Dense)             (None, 32)                192       
                                                                 
 dense_3 (Dense)             (None, 1)                 33        
                                                                 
Total params: 225
Trainable params: 225
Non-trainable params: 0
_________________________________________________________________
None


In [None]:
x_train =tf.convert_to_tensor(x_train, dtype=tf.float32)
y_train =tf.convert_to_tensor(y_train, dtype=tf.float32)
x_test = tf.convert_to_tensor(x_test, dtype=tf.float32)
y_test = tf.convert_to_tensor(y_test, dtype=tf.float32)

In [None]:
history = model.fit(x_train, y_train, validation_data = (x_test, y_test), epochs=150, batch_size=32, verbose=1)

Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150
Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150
Epoch 51/150
Epoch 52/150
Epoch 53/150
Epoch 54/150
Epoch 55/150
Epoch 56/150
Epoch 57/150
Epoch 58/150
Epoch 59/150
Epoch 60/150
Epoch 61/150
Epoch 62/150
Epoch 63/150
Epoch 64/150
Epoch 65/150
Epoch 66/150
Epoch 67/150
Epoch 68/150
Epoch 69/150
Epoch 70/150
Epoch 71/150
Epoch 72/150
Epoch 73/150
Epoch 74/150
Epoch 75/150
Epoch 76/150
Epoch 77/150
Epoch 78

In [None]:
loss, accuracy = model.evaluate(x_test, y_test, verbose=0)
print('Loss:', loss)
print('Accuracy:', accuracy)

Loss: 0.3562883734703064
Accuracy: 0.8461538553237915


In [None]:
model.save('model.h5')