In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
import tensorflow as tf

In [None]:
file_path = "/content/data_balita.csv"
data = pd.read_csv(file_path)

In [None]:
# Exploratory Data Analysis (EDA)
print("Dataset Overview:")
print(data.head())
print("\nMissing Values:")
print(data.isnull().sum())

Dataset Overview:
   Umur (bulan) Jenis Kelamin  Tinggi Badan (cm)       Status Gizi
0             0     laki-laki          44.591973           stunted
1             0     laki-laki          56.705203            tinggi
2             0     laki-laki          46.863358            normal
3             0     laki-laki          47.508026            normal
4             0     laki-laki          42.743494  severely stunted

Missing Values:
Umur (bulan)         0
Jenis Kelamin        0
Tinggi Badan (cm)    0
Status Gizi          0
dtype: int64


In [None]:
data.rename(columns={
    "Umur (bulan)": "umur_bulan",
    "Jenis Kelamin": "jenis_kelamin",
    "Tinggi Badan (cm)": "tinggi",
    "Status Gizi": "status_gizi"
}, inplace=True)

# Periksa apakah kolom telah diubah
print("Renamed Columns:")
print(data.columns)

Renamed Columns:
Index(['umur_bulan', 'jenis_kelamin', 'tinggi', 'status_gizi'], dtype='object')


In [None]:
# Handle missing values
data = data.dropna()

In [None]:
# Encode categorical features
categorical_columns = data.select_dtypes(include=['object']).columns
label_encoders = {}
for col in categorical_columns:
    label_encoders[col] = LabelEncoder()
    data[col] = label_encoders[col].fit_transform(data[col])

In [None]:
target = pd.get_dummies(data["status_gizi"], prefix="status_gizi")
data = pd.concat([data.drop(columns=["status_gizi"]), target], axis=1)

In [None]:
data.head()

Unnamed: 0,umur_bulan,jenis_kelamin,tinggi,status_gizi_0,status_gizi_1,status_gizi_2,status_gizi_3
0,0,0,44.591973,False,False,True,False
1,0,0,56.705203,False,False,False,True
2,0,0,46.863358,True,False,False,False
3,0,0,47.508026,True,False,False,False
4,0,0,42.743494,False,True,False,False


In [None]:
# Split features and target
X = data.drop(columns=target.columns)  # Semua kolom kecuali target
y = data[target.columns]  # Kolom target yang telah di-one-hot

In [None]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Define the model
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(X_train.shape[1],)),  # Mengatur input shape di sini
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(4, activation='softmax')
])

# Compile the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',  # For multi-class classification
              metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=5, batch_size=32, validation_split=0.2)

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Loss: {loss}")
print(f"Test Accuracy: {accuracy}")

# Save the trained model
model.save("model_balita.keras")

Epoch 1/5
[1m2420/2420[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step - accuracy: 0.8038 - loss: 0.5211 - val_accuracy: 0.9544 - val_loss: 0.1343
Epoch 2/5
[1m2420/2420[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 2ms/step - accuracy: 0.9645 - loss: 0.1099 - val_accuracy: 0.9748 - val_loss: 0.0835
Epoch 3/5
[1m2420/2420[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - accuracy: 0.9733 - loss: 0.0797 - val_accuracy: 0.9729 - val_loss: 0.0732
Epoch 4/5
[1m2420/2420[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - accuracy: 0.9744 - loss: 0.0692 - val_accuracy: 0.9766 - val_loss: 0.0616
Epoch 5/5
[1m2420/2420[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.9768 - loss: 0.0620 - val_accuracy: 0.9714 - val_loss: 0.0684
[1m757/757[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9705 - loss: 0.0696
Test Loss: 0.06739798933267593
Test Accuracy: 0.9716115593910217


In [None]:
# Load trained model
model = tf.keras.models.load_model("model_balita.keras")
model.summary()

In [None]:
# Prepare input data
input_data = {
    "umur_bulan": 16,
    "jenis_kelamin": "laki-laki",
    "tinggi": 91
}

# Convert input data to DataFrame
input_df = pd.DataFrame([input_data])

# Encode categorical features
input_df['jenis_kelamin'] = label_encoders['jenis_kelamin'].transform(input_df['jenis_kelamin'])

# Ensure the order of columns matches the original training data
input_df = input_df[["umur_bulan", "jenis_kelamin", "tinggi"]]

# Scale the input data
input_array_scaled = scaler.transform(input_df)

In [None]:
# Mapping indeks ke label deskriptif
class_labels = ["Severely Stunted", "Stunting", "Normal", "Tinggi"]

# Predict
prediction = model.predict(input_array_scaled)

# Interpret prediction
predicted_class_index = np.argmax(prediction)  # Indeks kelas dengan probabilitas tertinggi
predicted_class_label = class_labels[predicted_class_index]  # Label deskriptif

# Print results
print(f"Prediction (raw probabilities): {prediction[0]}")
print(f"Predicted class index: {predicted_class_index}")
print(f"Predicted class: {predicted_class_label}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step
Prediction (raw probabilities): [3.0565829e-05 0.0000000e+00 1.2860028e-27 9.9996948e-01]
Predicted class index: 3
Predicted class: Tinggi
