### **Import Library**

In [2]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder

In [3]:
!pip install tensorflow




## **Import Data**

In [5]:
# Membaca data
data = pd.read_csv('data_balita.csv')
data.head()

Unnamed: 0,Umur (bulan),Jenis Kelamin,Tinggi Badan (cm),Status Gizi
0,0,laki-laki,44.591973,stunted
1,0,laki-laki,56.705203,tinggi
2,0,laki-laki,46.863358,normal
3,0,laki-laki,47.508026,normal
4,0,laki-laki,42.743494,severely stunted


## **1. Data Understanding**

In [11]:
data.columns = ['Age', 'Gender','Height','Status']

In [12]:
data.head()

Unnamed: 0,Age,Gender,Height,Status
0,0,laki-laki,44.591973,stunted
1,0,laki-laki,56.705203,tinggi
2,0,laki-laki,46.863358,normal
3,0,laki-laki,47.508026,normal
4,0,laki-laki,42.743494,severely stunted


In [7]:
data.shape

(120999, 4)

In [8]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 120999 entries, 0 to 120998
Data columns (total 4 columns):
 #   Column             Non-Null Count   Dtype  
---  ------             --------------   -----  
 0   Umur (bulan)       120999 non-null  int64  
 1   Jenis Kelamin      120999 non-null  object 
 2   Tinggi Badan (cm)  120999 non-null  float64
 3   Status Gizi        120999 non-null  object 
dtypes: float64(1), int64(1), object(2)
memory usage: 3.7+ MB


In [9]:
data.describe()

Unnamed: 0,Umur (bulan),Tinggi Badan (cm)
count,120999.0,120999.0
mean,30.173803,88.655434
std,17.575119,17.300997
min,0.0,40.010437
25%,15.0,77.0
50%,30.0,89.8
75%,45.0,101.2
max,60.0,128.0


## **2. Cleaning Data**

In [13]:
# Menangani missing values, jika ada
data = data.dropna()  # Menghapus baris dengan missing values

In [14]:
data.duplicated().sum()

81574

In [16]:
data = data.drop_duplicates()

In [17]:
data.duplicated().sum()

0

## **3. Label Encoding**

In [18]:
# Mapping 'Jenis Kelamin' menjadi angka: laki-laki = 1, perempuan = 0
data['Gender'] = data['Gender'].map({'laki-laki': 1, 'perempuan': 0})

In [20]:
# Mapping 'Status Gizi' menjadi angka: normal = 0, stunted = 1, severely stunted = 2, tinggi = 3
label_encoder = LabelEncoder()
data['Status'] = label_encoder.fit_transform(data['Status'])


In [21]:
data.head()

Unnamed: 0,Age,Gender,Height,Status
0,0,1,44.591973,2
1,0,1,56.705203,3
2,0,1,46.863358,0
3,0,1,47.508026,0
4,0,1,42.743494,1


## **4. Data Preprocessing**

In [24]:
# Memisahkan fitur dan label
X = data[['Age', 'Gender', 'Height']].values  # Fitur
y = data['Status'].values  # Label

In [25]:
# Membagi data menjadi training dan testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [26]:
# Menormalisasi data (skala antara -1 hingga 1 atau 0 hingga 1)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

## **5. Modeling & Evaluation**

In [27]:
# Membuat model neural network (Feedforward Neural Network)
model = models.Sequential([
    layers.InputLayer(input_shape=(X_train.shape[1],)),  # Input layer
    layers.Dense(64, activation='relu'),  # Hidden layer 1
    layers.Dense(32, activation='relu'),  # Hidden layer 2
    layers.Dense(4, activation='softmax')  # Output layer (4 kelas: normal, stunted, severely stunted, tinggi)
])



In [28]:
# Kompilasi model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [29]:
# Melatih model
history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

Epoch 1/10
[1m986/986[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.6695 - loss: 0.8092 - val_accuracy: 0.9155 - val_loss: 0.2598
Epoch 2/10
[1m986/986[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9260 - loss: 0.2280 - val_accuracy: 0.9467 - val_loss: 0.1663
Epoch 3/10
[1m986/986[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9471 - loss: 0.1593 - val_accuracy: 0.9510 - val_loss: 0.1384
Epoch 4/10
[1m986/986[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.9547 - loss: 0.1310 - val_accuracy: 0.9552 - val_loss: 0.1190
Epoch 5/10
[1m986/986[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.9588 - loss: 0.1159 - val_accuracy: 0.9672 - val_loss: 0.1021
Epoch 6/10
[1m986/986[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9602 - loss: 0.1048 - val_accuracy: 0.9568 - val_loss: 0.1057
Epoch 7/10
[1m986/986[0m 

In [30]:
# Evaluasi model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy * 100:.2f}%")

[1m247/247[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.9750 - loss: 0.0679
Test Accuracy: 97.65%


## **6. Save Model**

In [31]:
model.save('model_status_gizi.h5')



In [32]:
import joblib

# Menyimpan scaler
joblib.dump(scaler, 'scaler_status_gizi.pkl')

# Menyimpan label encoder
joblib.dump(label_encoder, 'label_encoder_status_gizi.pkl')

['label_encoder_status_gizi.pkl']

In [33]:
joblib.dump(scaler, 'scaler_status_gizi.pkl')

['scaler_status_gizi.pkl']

### **Contoh Data Prediksi**

In [34]:
# Contoh data baru untuk prediksi
data_baru = np.array([[24, 1, 85]])  # Misalnya Umur=24 bulan, Laki-Laki, Tinggi Badan=85 cm
data_baru_scaled = scaler.transform(data_baru)  # Normalisasi data menggunakan scaler yang sama

# Prediksi status gizi
predictions = model.predict(data_baru_scaled)
predicted_class = np.argmax(predictions, axis=1)[0]  # Ambil kelas yang diprediksi

# Mengonversi hasil prediksi kembali ke label asli
predicted_label = label_encoder.inverse_transform([predicted_class])[0]

print(f"Prediksi Status Gizi: {predicted_label}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 177ms/step
Prediksi Status Gizi: normal
