<a href="https://colab.research.google.com/github/DenisKai7/GroUp/blob/ml-training/GroUp_stunting.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Import Dataset dari github

In [None]:
!git clone https://github.com/DenisKai7/GroUp.git

Cloning into 'GroUp'...
remote: Enumerating objects: 69, done.[K
remote: Counting objects: 100% (69/69), done.[K
remote: Compressing objects: 100% (56/56), done.[K
remote: Total 69 (delta 15), reused 4 (delta 0), pack-reused 0 (from 0)[K
Receiving objects: 100% (69/69), 92.22 KiB | 2.36 MiB/s, done.
Resolving deltas: 100% (15/15), done.


In [None]:
%cd GroUp
!git checkout dataset

/content/GroUp
Branch 'dataset' set up to track remote branch 'dataset' from 'origin'.
Switched to a new branch 'dataset'


In [None]:
!pip install optuna

Collecting optuna
  Downloading optuna-4.1.0-py3-none-any.whl.metadata (16 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.14.0-py3-none-any.whl.metadata (7.4 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading Mako-1.3.6-py3-none-any.whl.metadata (2.9 kB)
Downloading optuna-4.1.0-py3-none-any.whl (364 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m364.4/364.4 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.14.0-py3-none-any.whl (233 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m233.5/233.5 kB[0m [31m12.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.9.0-py3-none-any.whl (11 kB)
Downloading Mako-1.3.6-py3-none-any.whl (78 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.6/78.6 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: Ma

Import Library yang dibutuhkan

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.neighbors import NearestNeighbors
import matplotlib.pyplot as plt
import seaborn as sns
import optuna
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

**Preprocessing**

load dataset

In [7]:
#Load Dataset
data_path = '/content/GroUp/Stunting_Dataset.csv'
nutrition_data_path = '/content/GroUp/nutrition.csv'
data = pd.read_csv(data_path)
nutrition_data = pd.read_csv(nutrition_data_path)

#Preprocessing Data
class DataPreprocessor:
    def __init__(self, data):
        self.data = data
        self.scaler = StandardScaler()
        self.label_encoder = LabelEncoder()

    def preprocess(self):
        # Handle missing values
        self._handle_missing_values()
        # Feature engineering
        self._feature_engineering()
        return self._prepare_model_data()

    def _handle_missing_values(self):
        numeric_columns = self.data.select_dtypes(include=[np.number]).columns
        self.data[numeric_columns] = self.data[numeric_columns].fillna(
            self.data[numeric_columns].median()
        )

    def _feature_engineering(self):
        self.data['BMI'] = self.data['Body Weight'] / ((self.data['Body Length'] / 100) ** 2)
        self.data['Gender_Encoded'] = self.label_encoder.fit_transform(self.data['Gender'])

    def _prepare_model_data(self):
        features = ['Age', 'Birth Weight', 'Birth Length', 'Body Weight',
                    'Body Length', 'BMI', 'Gender_Encoded']
        X = self.data[features]
        y = self.label_encoder.fit_transform(self.data['Stunting'])

        X_scaled = self.scaler.fit_transform(X)
        return X_scaled, y

preprocessor = DataPreprocessor(data)
X, y = preprocessor.preprocess()

In [8]:
class ModelTrainer:
    def __init__(self):
        self.models = {}

    def train_random_forest(self, X, y):
        def objective(trial):
            params = {
                'n_estimators': trial.suggest_int('n_estimators', 50, 300),
                'max_depth': trial.suggest_int('max_depth', 3, 10),
                'min_samples_split': trial.suggest_int('min_samples_split', 2, 10)
            }

            rf = RandomForestClassifier(**params)
            rf.fit(X, y)

            y_pred = rf.predict(X)
            return accuracy_score(y, y_pred)

        study = optuna.create_study(direction='maximize')
        study.optimize(objective, n_trials=50)

        best_rf = RandomForestClassifier(**study.best_params)
        best_rf.fit(X, y)
        return best_rf, study.best_params

trainer = ModelTrainer()
rf_model, rf_params = trainer.train_random_forest(X, y)

[I 2024-11-12 16:10:13,705] A new study created in memory with name: no-name-d6397159-2819-4d8e-bc1d-9e5efe83ebc7
[I 2024-11-12 16:10:14,255] Trial 0 finished with value: 0.8604 and parameters: {'n_estimators': 109, 'max_depth': 5, 'min_samples_split': 5}. Best is trial 0 with value: 0.8604.
[I 2024-11-12 16:10:14,498] Trial 1 finished with value: 0.8511 and parameters: {'n_estimators': 59, 'max_depth': 3, 'min_samples_split': 9}. Best is trial 0 with value: 0.8604.
[I 2024-11-12 16:10:15,307] Trial 2 finished with value: 0.8615 and parameters: {'n_estimators': 164, 'max_depth': 5, 'min_samples_split': 3}. Best is trial 2 with value: 0.8615.
[I 2024-11-12 16:10:16,927] Trial 3 finished with value: 0.8774 and parameters: {'n_estimators': 252, 'max_depth': 8, 'min_samples_split': 7}. Best is trial 3 with value: 0.8774.
[I 2024-11-12 16:10:18,243] Trial 4 finished with value: 0.8717 and parameters: {'n_estimators': 219, 'max_depth': 7, 'min_samples_split': 3}. Best is trial 3 with value: 

In [10]:
def create_lstm_model(input_shape):
    model = Sequential([
        LSTM(64, input_shape=input_shape, return_sequences=True),
        Dropout(0.3),
        LSTM(32, return_sequences=False),
        Dropout(0.3),
        Dense(1)
    ])
    model.compile(loss='mean_squared_error', optimizer='adam')
    return model

lstm_model = create_lstm_model((X.shape[1], 1))
lstm_model.summary()

# Transforming data for LSTM
X_lstm = np.expand_dims(X, axis=2)
y_lstm = y.astype(float)

X_train, X_test, y_train, y_test = train_test_split(X_lstm, y_lstm, test_size=0.2)
lstm_history = lstm_model.fit(X_train, y_train, epochs=100, validation_data=(X_test, y_test))


Epoch 1/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 16ms/step - loss: 0.3056 - val_loss: 0.1558
Epoch 2/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 23ms/step - loss: 0.1652 - val_loss: 0.1546
Epoch 3/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 17ms/step - loss: 0.1527 - val_loss: 0.1393
Epoch 4/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 20ms/step - loss: 0.1424 - val_loss: 0.1358
Epoch 5/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 8ms/step - loss: 0.1311 - val_loss: 0.1343
Epoch 6/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 8ms/step - loss: 0.1258 - val_loss: 0.1328
Epoch 7/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step - loss: 0.1247 - val_loss: 0.1274
Epoch 8/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 13ms/step - loss: 0.1216 - val_loss: 0.1251
Epoch 9/100
[1m250/250[0m

In [11]:
class NutritionRecommender:
    def __init__(self, nutrition_data):
        self.nutrition_data = nutrition_data

    def recommend_nutrition(self, age, stunting_status, weight):
        filtered_data = self.nutrition_data[
            (self.nutrition_data['usia minimal(bulan)'] <= age) &
            (self.nutrition_data['usia maksimal(bulan)'] >= age)
        ]

        if stunting_status == 'Stunting':
            recommended = filtered_data[
                (filtered_data['protein (g)'] > filtered_data['protein (g)'].median()) &
                (filtered_data['kalori'] > filtered_data['kalori'].median())
            ]
        else:
            recommended = filtered_data

        recommended['adjusted_portion'] = recommended['kalori'] * (weight / 10)

        return recommended.sort_values('adjusted_portion', ascending=False).head(5)

recommender = NutritionRecommender(nutrition_data)
recommendations = recommender.recommend_nutrition(age=24, stunting_status='Stunting', weight=10)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  recommended['adjusted_portion'] = recommended['kalori'] * (weight / 10)
