In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import os
import json
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import classification_report, accuracy_score




In [2]:
def set_project_directory():
    current_dir = os.getcwd()
    
    if os.path.basename(current_dir) == 'scripts':
        os.chdir('..')
    
    print(f"Working directory set to: {os.getcwd()}")

set_project_directory()

Working directory set to: c:\Users\Dana\Documents\Kuliah\Bangkit\Capstone-C242-PS384_Project01


In [87]:
def train_and_save_model():

      data_path = "dataset/symptoms-data/symptoms-dataset.csv"
      data = pd.read_csv(data_path)

      print("Dataset shape:", data.shape)
      print("Columns:", data.columns)

      X = data.drop("prognosis", axis=1)
      y = data["prognosis"]

      label_encoder = LabelEncoder()
      y_encoded = label_encoder.fit_transform(y)

      scaler = StandardScaler()
      X_scaled = scaler.fit_transform(X)

      X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded)

      # Define the model
      model = Sequential([
      Dense(256, activation='relu', input_shape=(X_train.shape[1],),
            kernel_regularizer=tf.keras.regularizers.l2(0.01)),
      BatchNormalization(),
      Dropout(0.4),
      Dense(128, activation='relu',
            kernel_regularizer=tf.keras.regularizers.l2(0.01)),
      BatchNormalization(),
      Dropout(0.3),
      Dense(64, activation='relu',
            kernel_regularizer=tf.keras.regularizers.l2(0.01)),
      Dense(len(label_encoder.classes_), activation='softmax')
      ])

      model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

      history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=30, batch_size=32, verbose=1)

      loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
      print(f"Test Loss: {loss:.4f}")
      print(f"Test Accuracy: {accuracy:.4f}")

      y_pred = np.argmax(model.predict(X_test), axis=1)
      print("\nClassification Report:\n")
      print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))

      model.save('models/symptoms_predict_model.h5')
      print("Model saved at models/symptoms_predict_model.h5")

      label_mapping = {
            'index_to_label': {str(i): label for i, label in enumerate(label_encoder.classes_)},
            'label_to_index': {label: str(i) for i, label in enumerate(label_encoder.classes_)}
      }
      
      with open('models/symptoms_labels.json', 'w') as f:
            json.dump(label_mapping, f, indent=2)
      print("Label mapping saved at models/symptoms_labels.json")

train_and_save_model()

Dataset shape: (4961, 133)
Columns: Index(['itching', 'skin_rash', 'nodal_skin_eruptions', 'continuous_sneezing',
       'shivering', 'chills', 'joint_pain', 'stomach_pain', 'acidity',
       'ulcers_on_tongue',
       ...
       'blackheads', 'scurring', 'skin_peeling', 'silver_like_dusting',
       'small_dents_in_nails', 'inflammatory_nails', 'blister',
       'red_sore_around_nose', 'yellow_crust_ooze', 'prognosis'],
      dtype='object', length=133)
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Test Loss: 0.3247
Test Accuracy: 1.0000

Classification Report:

                               precision    recall  f1-score   support

                         AIDS       1.00      1.00   

  saving_api.save_model(


In [3]:
symptoms_mapping = {
    'gatal': 'itching',
    'ruam kulit': 'skin_rash', 
    'benjolan pada kulit': 'nodal_skin_eruptions',
    'jerawat bernanah': 'pus_filled_pimples',
    'komedo': 'blackheads',
    'kulit mengelupas': 'skin_peeling',
    'kulit seperti berdebu perak': 'silver_like_dusting',
    'luka merah di sekitar hidung': 'red_sore_around_nose',
    'keropeng kuning': 'yellow_crust_ooze',#
    'bersin terus menerus': 'continuous_sneezing',
    'menggigil': 'shivering',
    'meriang': 'chills',
    'nyeri sendi': 'joint_pain',
    'sakit perut': 'stomach_pain',
    'asam lambung': 'acidity',
    'sariawan': 'ulcers_on_tongue',
    'otot mengecil': 'muscle_wasting',
    'muntah': 'vomiting',
    'rasa terbakar saat buang air kecil': 'burning_micturition',
    'bercak saat buang air kecil': 'spotting_urination',
    'kelelahan': 'fatigue',#
    'kenaikan berat badan': 'weight_gain',
    'penurunan berat badan': 'weight_loss',
    'kecemasan': 'anxiety',
    'tangan dan kaki dingin': 'cold_hands_and_feets',
    'perubahan suasana hati': 'mood_swings',
    'gelisah': 'restlessness',
    'lesu': 'lethargy',#
    'bercak di tenggorokan': 'patches_in_throat',
    'batuk': 'cough',
    'sesak napas': 'breathlessness',
    'berkeringat': 'sweating',
    'dehidrasi': 'dehydration',
    'gangguan pencernaan': 'indigestion',
    'sakit kepala': 'headache',#
    'kulit kuning': 'yellowish_skin',
    'urin gelap': 'dark_urine',
    'mual': 'nausea',
    'kehilangan nafsu makan': 'loss_of_appetite',
    'nyeri di belakang mata': 'pain_behind_the_eyes',
    'nyeri punggung': 'back_pain',
    'sembelit': 'constipation',
    'nyeri perut': 'abdominal_pain',
    'diare': 'diarrhoea',#
    'demam ringan': 'mild_fever',
    'demam tinggi': 'high_fever',
    'mata cekung': 'sunken_eyes',
    'urin kuning': 'yellow_urine',
    'mata kuning': 'yellowing_of_eyes',
    'gagal hati akut': 'acute_liver_failure',#
    'kelebihan cairan': 'fluid_overload',
    'perut membengkak': 'swelling_of_stomach',
    'pembengkakan kelenjar getah bening': 'swelled_lymph_nodes',
    'malaise': 'malaise',
    'penglihatan kabur': 'blurred_and_distorted_vision',
    'dahak': 'phlegm',
    'iritasi tenggorokan': 'throat_irritation',
    'mata merah': 'redness_of_eyes',
    'tekanan sinus': 'sinus_pressure',
    'hidung berair': 'runny_nose',
    'hidung tersumbat': 'congestion',
    'nyeri dada': 'chest_pain',
    'kelemahan anggota tubuh': 'weakness_in_limbs',
    'detak jantung cepat': 'fast_heart_rate',#
    'nyeri saat buang air besar': 'pain_during_bowel_movements',
    'nyeri di daerah anus': 'pain_in_anal_region',
    'tinja berdarah': 'bloody_stool',
    'iritasi pada anus': 'irritation_in_anus',
    'nyeri leher': 'neck_pain',
    'pusing': 'dizziness',
    'kram': 'cramps',
    'memar': 'bruising',
    'obesitas': 'obesity',
    'kaki bengkak': 'swollen_legs',
    'pembuluh darah bengkak': 'swollen_blood_vessels',
    'wajah dan mata bengkak': 'puffy_face_and_eyes',
    'kelenjar tiroid membesar': 'enlarged_thyroid',
    'kuku rapuh': 'brittle_nails',
    'ekstremitas bengkak': 'swollen_extremeties',
    'rasa lapar berlebihan': 'excessive_hunger',
    'bibir kering dan kesemutan': 'drying_and_tingling_lips',
    'bicara pelo': 'slurred_speech',
    'nyeri lutut': 'knee_pain',
    'nyeri sendi pinggul': 'hip_joint_pain',
    'kelemahan otot': 'muscle_weakness',
    'leher kaku': 'stiff_neck',
    'sendi bengkak': 'swelling_joints',
    'kekakuan gerakan': 'movement_stiffness',
    'gerakan berputar': 'spinning_movements',
    'kehilangan keseimbangan': 'loss_of_balance',
    'goyah': 'unsteadiness',
    'kelemahan satu sisi tubuh': 'weakness_of_one_body_side',
    'kehilangan penciuman': 'loss_of_smell',
    'ketidaknyamanan kandung kemih': 'bladder_discomfort',
    'bau urin tidak sedap': 'foul_smell_of urine',
    'buang air kecil terus menerus': 'continuous_feel_of_urine',
    'buang gas': 'passage_of_gases',
    'gatal internal': 'internal_itching',
    'wajah toksik': 'toxic_look_(typhos)',
    'depresi': 'depression',
    'mudah tersinggung': 'irritability',
    'nyeri otot': 'muscle_pain',
    'perubahan kesadaran': 'altered_sensorium',
    'bintik merah di tubuh': 'red_spots_over_body',
    'nyeri perut': 'belly_pain',
    'menstruasi tidak normal': 'abnormal_menstruation',
    'bercak perubahan warna': 'dischromic_patches',
    'mata berair': 'watering_from_eyes',
    'nafsu makan meningkat': 'increased_appetite',
    'buang air kecil berlebihan': 'polyuria',
    'riwayat keluarga': 'family_history',
    'dahak berlendir': 'mucoid_sputum',
    'dahak berkarat': 'rusty_sputum',
    'kurang konsentrasi': 'lack_of_concentration',
    'gangguan penglihatan': 'visual_disturbances',
    'menerima transfusi darah': 'receiving_blood_transfusion',
    'menerima suntikan tidak steril': 'receiving_unsterile_injections',
    'koma': 'coma',
    'pendarahan lambung': 'stomach_bleeding',
    'perut membuncit': 'distention_of_abdomen',
    'riwayat konsumsi alkohol': 'history_of_alcohol_consumption',
    'dahak berdarah': 'blood_in_sputum',
    'pembuluh darah menonjol di betis': 'prominent_veins_on_calf',
    'jantung berdebar': 'palpitations',
    'nyeri saat berjalan': 'painful_walking',
    'lepuh': 'blister'
}

In [4]:
def predict_multiple_diseases(symptoms_input, threshold=0.1):
    invalid_symptoms = []
    
    for symptom in symptoms_input:
        if symptom.lower() in symptoms_mapping:
            symptoms_input.append(symptoms_mapping[symptom.lower()])
        else:
            invalid_symptoms.append(symptom)
    
    df = pd.read_csv('dataset/symptoms-data/symptoms-dataset.csv')
    symptoms_columns = df.columns[:-1]
    
    input_array = np.zeros(len(symptoms_columns))
    
    valid_symptoms = []
    for symptom in symptoms_input:
        if symptom in symptoms_columns:
            index = symptoms_columns.get_loc(symptom)
            input_array[index] = 1
            valid_symptoms.append(symptom)
    
    model = tf.keras.models.load_model('models/symptoms_predict_model.h5')
    with open('models/symptoms_labels.json', 'r') as f:
        label_mapping = json.load(f)
    
    input_reshaped = input_array.reshape(1, -1)
    prediction_probs = model.predict(input_reshaped)[0]
    
    possible_diseases = []
    for idx, prob in enumerate(prediction_probs):
        if prob >= threshold:
            disease = label_mapping['index_to_label'][str(idx)]
            possible_diseases.append({
                'disease': disease,
                'probability': prob
            })
    
    possible_diseases = sorted(possible_diseases, 
                             key=lambda x: x['probability'], 
                             reverse=True)
    
    return {
        'predictions': possible_diseases,
        'valid_symptoms': [list(symptoms_mapping.keys())[list(symptoms_mapping.values()).index(s)] 
                         for s in valid_symptoms],
        'invalid_symptoms': invalid_symptoms
    }

def get_available_symptoms():
    return list(symptoms_mapping.keys())

def search_symptoms(keyword):
    available = get_available_symptoms()
    matches = [s for s in available if keyword.lower() in s.lower()]
    return matches



In [5]:
if __name__ == "__main__":
    # symptoms input must using b.indo and based from the symptoms_mapping dictionary above
    symptoms = ['pusing', 'sakit kepala', 'meriang', 'muntah']
    
    print("inputted symptoms:", symptoms)
    print("\nanalyze symptoms...\n")
    
    results = predict_multiple_diseases(symptoms, threshold=0.1)
    
    print("valid symptoms:", ", ".join(results['valid_symptoms']))
    if results['invalid_symptoms']:
        print("invlid symptoms:", ", ".join(results['invalid_symptoms']))
    
    print("\nPossible Diseases:")
    if results['predictions']:
        for pred in results['predictions']:
            prob_percentage = pred['probability'] * 100
            print(f"- {pred['disease']}: {prob_percentage:.1f}%")
    else:
        print("no possible disease based on inputted symptoms.")

inputted symptoms: ['pusing', 'sakit kepala', 'meriang', 'muntah']

analyze symptoms...


valid symptoms: pusing, sakit kepala, meriang, muntah
invlid symptoms: dizziness, headache, chills, vomiting

Possible Diseases:
- Hypertension : 67.2%
