# **Capstone Project : GejalaKu**
# **Data Preparation**


## Import Libraries & Load Dataset

In [90]:
#!pip install deep_translator

Collecting deep_translator
  Downloading deep_translator-1.11.4-py3-none-any.whl.metadata (30 kB)
Downloading deep_translator-1.11.4-py3-none-any.whl (42 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/42.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.3/42.3 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: deep_translator
Successfully installed deep_translator-1.11.4


In [91]:
import numpy as np
import pandas as pd
from deep_translator import GoogleTranslator

In [2]:
from google.colab import files

# Upload file kaggle.json tanpa menampilkan isinya
uploaded = files.upload()

# Pastikan file sudah di-upload sebelum melanjutkan
import os
import json

# Buat folder ~/.kaggle jika belum ada
os.makedirs("/root/.kaggle", exist_ok=True)

# Pindahkan file ke folder konfigurasi Kaggle
with open("kaggle.json", "r") as f:
    kaggle_token = json.load(f)

# Simpan ke lokasi yang dibutuhkan Kaggle CLI
with open("/root/.kaggle/kaggle.json", "w") as f:
    json.dump(kaggle_token, f)

# Atur permission agar hanya dapat diakses oleh user
os.chmod("/root/.kaggle/kaggle.json", 0o600)

Saving kaggle.json to kaggle.json


In [3]:
# Download kaggle dataset and unzip the file
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

!kaggle datasets download -d itachi9604/disease-symptom-description-dataset
!kaggle datasets download -d manncodes/drug-prescription-to-disease-dataset

!unzip disease-symptom-description-dataset.zip
!unzip drug-prescription-to-disease-dataset.zip

Dataset URL: https://www.kaggle.com/datasets/itachi9604/disease-symptom-description-dataset
License(s): CC-BY-SA-4.0
Downloading disease-symptom-description-dataset.zip to /content
  0% 0.00/30.1k [00:00<?, ?B/s]
100% 30.1k/30.1k [00:00<00:00, 146MB/s]
Dataset URL: https://www.kaggle.com/datasets/manncodes/drug-prescription-to-disease-dataset
License(s): CC-BY-NC-SA-4.0
Downloading drug-prescription-to-disease-dataset.zip to /content
  0% 0.00/129k [00:00<?, ?B/s]
100% 129k/129k [00:00<00:00, 438MB/s]
Archive:  disease-symptom-description-dataset.zip
  inflating: Symptom-severity.csv    
  inflating: dataset.csv             
  inflating: symptom_Description.csv  
  inflating: symptom_precaution.csv  
Archive:  drug-prescription-to-disease-dataset.zip
  inflating: final.csv               


In [95]:
# dataset penyakit dan gejala
df_disease = pd.read_csv('dataset.csv')
df_disease.head()

Unnamed: 0,Disease,Symptom_1,Symptom_2,Symptom_3,Symptom_4,Symptom_5,Symptom_6,Symptom_7,Symptom_8,Symptom_9,Symptom_10,Symptom_11,Symptom_12,Symptom_13,Symptom_14,Symptom_15,Symptom_16,Symptom_17
0,Fungal infection,itching,skin_rash,nodal_skin_eruptions,dischromic _patches,,,,,,,,,,,,,
1,Fungal infection,skin_rash,nodal_skin_eruptions,dischromic _patches,,,,,,,,,,,,,,
2,Fungal infection,itching,nodal_skin_eruptions,dischromic _patches,,,,,,,,,,,,,,
3,Fungal infection,itching,skin_rash,dischromic _patches,,,,,,,,,,,,,,
4,Fungal infection,itching,skin_rash,nodal_skin_eruptions,,,,,,,,,,,,,,


In [96]:
# dataset penyakit dan obat
df_drug = pd.read_csv('final.csv')
df_drug.head()

Unnamed: 0.1,Unnamed: 0,disease,drug
0,0,Alkylating Agent Cystitis,sodium bicarbonate
1,1,Alkylating Agent Cystitis,citric acid / sodium citrate
2,2,Abdominal Distension,bethanechol
3,3,Abdominal Distension,pamabrom
4,4,Abdominal Distension,bethanechol


In [97]:
# menghapus kolom tidak penting pada df_drug
df_drug.drop(columns=['Unnamed: 0'], inplace=True)

In [98]:
# dataset deskripsi penyakit
df_disease_description = pd.read_csv('symptom_Description.csv')
df_disease_description.head()

Unnamed: 0,Disease,Description
0,Drug Reaction,An adverse drug reaction (ADR) is an injury ca...
1,Malaria,An infectious disease caused by protozoan para...
2,Allergy,An allergy is an immune system response to a f...
3,Hypothyroidism,"Hypothyroidism, also called underactive thyroi..."
4,Psoriasis,Psoriasis is a common skin disorder that forms...


In [99]:
# dataset pencegahan penyakit
df_disease_precaution = pd.read_csv('symptom_precaution.csv')
df_disease_precaution.head()

Unnamed: 0,Disease,Precaution_1,Precaution_2,Precaution_3,Precaution_4
0,Drug Reaction,stop irritation,consult nearest hospital,stop taking drug,follow up
1,Malaria,Consult nearest hospital,avoid oily food,avoid non veg food,keep mosquitos out
2,Allergy,apply calamine,cover area with bandage,,use ice to compress itching
3,Hypothyroidism,reduce stress,exercise,eat healthy,get proper sleep
4,Psoriasis,wash hands with warm soapy water,stop bleeding using pressure,consult doctor,salt baths


## Cek Dataset

In [100]:
# cek missing value di df_drug
print(f"banyak data dengan missing value di df_drug :")
print(df_drug.isna().sum())
print()

# cek data duplikat di df_disease dan df_drug
print(f"banyak data duplikat di df_disease : {df_disease.duplicated().sum()}")
print(f"banyak data duplikat di df_drug : {df_drug.duplicated().sum()}")
print(f"banyak data duplikat di df_disease_description : {df_disease_description.duplicated().sum()}")
print(f"banyak data duplikat di df_disease_precaution : {df_disease_precaution.duplicated().sum()}")

banyak data dengan missing value di df_drug :
disease    0
drug       0
dtype: int64

banyak data duplikat di df_disease : 4616
banyak data duplikat di df_drug : 8829
banyak data duplikat di df_disease_description : 0
banyak data duplikat di df_disease_precaution : 0


In [101]:
# hapus duplikat
df_disease = df_disease.drop_duplicates()
df_drug = df_drug.drop_duplicates()

In [102]:
# mengecek penyakit dan gejala apa saja yang ada pada dataset

list_gejala = pd.unique(df_disease.iloc[:, 1:].values.ravel())

print(f'banyak penyakit : {df_disease["Disease"].nunique()}')
print('penyakit :')
print(df_disease['Disease'].unique())
print()
print(f'banyak gejala : {list_gejala.shape[0]}')
print('gejala :')
print(list_gejala)

banyak penyakit : 41
penyakit :
['Fungal infection' 'Allergy' 'GERD' 'Chronic cholestasis' 'Drug Reaction'
 'Peptic ulcer diseae' 'AIDS' 'Diabetes ' 'Gastroenteritis'
 'Bronchial Asthma' 'Hypertension ' 'Migraine' 'Cervical spondylosis'
 'Paralysis (brain hemorrhage)' 'Jaundice' 'Malaria' 'Chicken pox'
 'Dengue' 'Typhoid' 'hepatitis A' 'Hepatitis B' 'Hepatitis C'
 'Hepatitis D' 'Hepatitis E' 'Alcoholic hepatitis' 'Tuberculosis'
 'Common Cold' 'Pneumonia' 'Dimorphic hemmorhoids(piles)' 'Heart attack'
 'Varicose veins' 'Hypothyroidism' 'Hyperthyroidism' 'Hypoglycemia'
 'Osteoarthristis' 'Arthritis' '(vertigo) Paroymsal  Positional Vertigo'
 'Acne' 'Urinary tract infection' 'Psoriasis' 'Impetigo']

banyak gejala : 132
gejala :
['itching' ' skin_rash' ' nodal_skin_eruptions' ' dischromic _patches' nan
 ' continuous_sneezing' ' shivering' ' chills' ' watering_from_eyes'
 ' stomach_pain' ' acidity' ' ulcers_on_tongue' ' vomiting' ' cough'
 ' chest_pain' ' yellowish_skin' ' nausea' ' loss_of_

In [103]:
print(f'banyak penyakit : {df_drug["disease"].nunique()}')
print('penyakit :')
print(df_drug['disease'].unique())

banyak penyakit : 1508
penyakit :
['Alkylating Agent Cystitis' 'Abdominal Distension'
 'Abdominal Distension Prior to Abdominal X-ray' ...
 'Yellow Fever Prophylaxis' 'Zinc Deficiency' 'Zollinger-Ellison Syndrome']


In [104]:
print(f'banyak penyakit di df_disease_description : {df_disease_description["Disease"].nunique()}')
print(f'banyak penyakit di df_disease_precaution : {df_disease_precaution["Disease"].nunique()}')

banyak penyakit di df_disease_description : 41
banyak penyakit di df_disease_precaution : 41


## Menyesuaikan obat di df_drug berdasarkan df_disease

In [105]:
# cek apakah semua disease pada dataset symptom ada pada dataset drug

diseases_in_disease_df = set(df_disease['Disease'].unique())
diseases_in_drug_df = set(df_drug['disease'].unique())

# cek disease di df_disease yang tidak ada di df
missing_diseases = diseases_in_disease_df - diseases_in_drug_df

# menampilkan penyakit yang tidak ada
print("Penyakit yang TIDAK ada di dataset drug prescription:")
print(f'banyak penyakit : {len(missing_diseases)}')
print(missing_diseases)

Penyakit yang TIDAK ada di dataset drug prescription:
banyak penyakit : 29
{'Paralysis (brain hemorrhage)', 'Jaundice', 'Dimorphic hemmorhoids(piles)', 'Hepatitis D', 'Arthritis', 'Osteoarthristis', 'Dengue', 'Chronic cholestasis', 'Allergy', 'AIDS', 'Hypothyroidism', 'Fungal infection', 'Diabetes ', 'Typhoid', 'Hypertension ', 'Varicose veins', 'Alcoholic hepatitis', 'Tuberculosis', 'hepatitis A', 'Chicken pox', 'Cervical spondylosis', '(vertigo) Paroymsal  Positional Vertigo', 'Bronchial Asthma', 'Urinary tract infection', 'Drug Reaction', 'Hepatitis E', 'Peptic ulcer diseae', 'Common Cold', 'Heart attack'}


note untuk kesesuaian disease pada symptom dan drug
- Osteoarthristis ada di drug, tp typo aja yg di symptom itu harusnya Osteoarthritis
- Hepatitis D **tidak ada** di drug
- Heart Attack ada di drug tp namanya Heart Attack (Myocardial Infarction)
- (vertigo) Paroymsal  Positional Vertigo sama dengan Vertigo di drug
- Dengue sama dengan Dengue Fever di drug
- Hypertension sama dengan High Blood Pressure (Hypertension) di drug
- Hypothyroidism sama dengan Underactive Thyroid (Hypothyroidism) di drug
- Jaundice bisa disamakan dengan Hyperbilirubinemia di drug
- Fungal infection sama dengan Fungal Infections di drug
- Cervical spondylosis bisa disamakan dengan Neck Pain di drug karena relevan secara klinis
- Varicose veins sama dengan Varicose Veins di drug
- Drug Reaction bisa disamakan dengan 'anemia, drug induced', 'constipation, drug induced', 'drug induced vitamin/mineral deficiency' di drug (bedanya lebih spesifik yang di drug)
- Paralysis (brain hemorrhage) **tidak ada** di drug
- Common Cold sama dengan Cold Symptoms di drug
- Peptic ulcer diseae sama dengan Peptic Ulcer di drug
- Urinary tract infection sama dengan Urinary Tract Infection di drug
- Alcoholic hepatitis bisa diganti dengan Alcoholic Liver Damage di drug (bedanya lebih umum di drug)
- Diabetes bisa disamakan dengan 'Diabetes, Type 2', 'Diabetes Mellitus', 'Diabetes, Type 1', 'Gestational Diabetes', 'Diabetes, Type 3c' di drug (bedanya do drug spesifik)
- AIDS bisa menggunakan AIDS Related Wasting di drug (obat yang fokus pada suatu komplikasi penyakit tersebut)
- Tuberculosis bisa disamakan dengan 'Tuberculosis, Active' dan 'Pulmonary Tuberculosis' di drug
- Chicken pox bisa menggunakan Varicella-Zoster di drug
- Arthritis bisa menggunakan 'Rheumatoid Arthritis' dan 'Spondyloarthritis' di drug
- Hepatitis E bisa menggunakan Hepatitis A di drug (karena gejalanya sangat mirip)
- Typhoid sama dengan Typhoid Fever di drug
- Dimorphic hemmorhoids(piles) sama dengan Hemorrhoids di drug
- hepatitis A sama dengan Hepatitis A di drug
- Allergy sama dengan Allergic Reactions di drug
- Chronic cholestasis bisa menggunakan Primary Biliary Cholangitis di drug (jenis penyakit terkait)
- Bronchial Asthma sama dengan Asthma di drug

In [106]:
"""
Mengubah nama penyakit pada df_drug agar sesuai dengan df_disease
"""

# mapping eksplisit dari df_drug → nama penyakit di df_disease
drug_to_disease_mapping = {
    "Osteoarthritis": "Osteoarthristis",
    "Heart Attack (Myocardial Infarction)": "Heart attack",
    "Vertigo": "(vertigo) Paroymsal  Positional Vertigo",
    "Dengue Fever": "Dengue",
    "High Blood Pressure (Hypertension)": "Hypertension ",
    "Underactive Thyroid (Hypothyroidism)": "Hypothyroidism",
    "Hyperbilirubinemia": "Jaundice",
    "Fungal Infections": "Fungal infection",
    "Neck Pain": "Cervical spondylosis",
    "Varicose Veins": "Varicose veins",
    "anemia, drug induced": "Drug Reaction",
    "constipation, drug induced": "Drug Reaction",
    "drug induced vitamin/mineral deficiency": "Drug Reaction",
    "Cold Symptoms": "Common Cold",
    "Peptic Ulcer": "Peptic ulcer diseae",
    "Urinary Tract Infection": "Urinary tract infection",
    "Alcoholic Liver Damage": "Alcoholic hepatitis",
    "Diabetes, Type 1": "Diabetes ",
    "Diabetes, Type 2": "Diabetes ",
    "Diabetes Mellitus": "Diabetes ",
    "Diabetes, Type 3c": "Diabetes ",
    "Gestational Diabetes": "Diabetes ",
    "AIDS Related Wasting": "AIDS",
    "Tuberculosis, Active": "Tuberculosis",
    "Pulmonary Tuberculosis": "Tuberculosis",
    "Varicella-Zoster": "Chicken pox",
    "Rheumatoid Arthritis": "Arthritis",
    "Spondyloarthritis": "Arthritis",
    "Typhoid Fever": "Typhoid",
    "Hemorrhoids": "Dimorphic hemmorhoids(piles)",
    "Allergic Reactions": "Allergy",
    "Primary Biliary Cholangitis": "Chronic cholestasis",
    "Asthma": "Bronchial Asthma",
    "Hepatitis A": "hepatitis A",
    'Anemia, Drug Induced': 'Drug Reaction',
    'Constipation, Drug Induced': 'Drug Reaction',
    'Drug Induced Vitamin/Mineral Deficiency': 'Drug Reaction'
}

# menambahkan keterangan untuk penyakit yang tidak ada obatnya di df_drug
new_data = pd.DataFrame({
    'disease': ['Hepatitis D', 'Paralysis (brain hemorrhage)'],
    'drug': [
        'Obat spesifik untuk Hepatitis D tidak tersedia di dataset. Disarankan untuk konsultasi dengan dokter spesialis hati.',
        'Paralysis akibat brain hemorrhage memerlukan penanganan medis kompleks. Segera konsultasikan ke dokter saraf.'
    ]})
# tambahkan ke df_drug
df_drug = pd.concat([df_drug, new_data], ignore_index=True)

# Terapkan mapping ke df_drug
df_drug['disease'] = df_drug['disease'].replace(drug_to_disease_mapping)

# Baris yang penyakitnya Hepatitis A
hepa_a_rows = df_drug[df_drug['disease'] == "hepatitis A"].copy()
# Ubah nama penyakit jadi Hepatitis E di copy-nya
hepa_a_rows['disease'] = "Hepatitis E"
# Gabungkan kembali ke df_drug
df_drug = pd.concat([df_drug, hepa_a_rows], ignore_index=True)

In [107]:
# cek apakah semua disease pada dataset symptom sudah ada pada dataset drug

diseases_in_disease_df_after = set(df_disease['Disease'].unique())
diseases_in_drug_df_after = set(df_drug['disease'].unique())

# cek disease di df_disease yang tidak ada di df
missing_diseases_after = diseases_in_disease_df_after - diseases_in_drug_df_after

# menampilkan penyakit yang tidak ada
print("Penyakit yang TIDAK ada di dataset drug prescription setelah dimapping:")
print(len(missing_diseases_after))

Penyakit yang TIDAK ada di dataset drug prescription setelah dimapping:
0


## Translate menjadi Bahasa Indonesia

### Nama Penyakit / Disease

In [108]:
# Dictionary mapping
disease_translation = {
    'Fungal infection': 'Infeksi jamur',
    'Allergy': 'Alergi',
    'GERD': 'GERD (refluks asam lambung)',
    'Chronic cholestasis': 'Kolestasis kronis',
    'Drug Reaction': 'Reaksi obat',
    'Peptic ulcer diseae': 'Tukak lambung',
    'AIDS': 'AIDS',
    'Diabetes ': 'Diabetes',
    'Gastroenteritis': 'Gastroenteritis',
    'Bronchial Asthma': 'Asma bronkial',
    'Hypertension ': 'Hipertensi',
    'Migraine': 'Migrain',
    'Cervical spondylosis': 'Spondilosis servikal',
    'Paralysis (brain hemorrhage)': 'Kelumpuhan (perdarahan otak)',
    'Jaundice': 'Penyakit kuning',
    'Malaria': 'Malaria',
    'Chicken pox': 'Cacar air',
    'Dengue': 'Demam berdarah (Dengue)',
    'Typhoid': 'Tifus',
    'hepatitis A': 'Hepatitis A',
    'Hepatitis B': 'Hepatitis B',
    'Hepatitis C': 'Hepatitis C',
    'Hepatitis D': 'Hepatitis D',
    'Hepatitis E': 'Hepatitis E',
    'Alcoholic hepatitis': 'Hepatitis alkoholik',
    'Tuberculosis': 'Tuberkulosis',
    'Common Cold': 'Flu biasa',
    'Pneumonia': 'Pneumonia',
    'Dimorphic hemmorhoids(piles)': 'Wasir tipe dimorfik',
    'Heart attack': 'Serangan jantung',
    'Varicose veins': 'Varises',
    'Hypothyroidism': 'Hipotiroidisme',
    'Hyperthyroidism': 'Hipertiroidisme',
    'Hypoglycemia': 'Hipoglikemia',
    'Osteoarthristis': 'Osteoartritis',
    'Arthritis': 'Arthritis',
    '(vertigo) Paroymsal  Positional Vertigo': 'Vertigo posisi paroksismal',
    'Acne': 'Jerawat',
    'Urinary tract infection': 'Infeksi saluran kemih',
    'Psoriasis': 'Psoriasis',
    'Impetigo': 'Impetigo'
}

# Terapkan mapping ke kolom Disease
df_disease['Disease'] = df_disease['Disease'].map(disease_translation)
df_drug['disease'] = df_drug['disease'].map(disease_translation)
df_disease_description['Disease'] = df_disease_description['Disease'].map(disease_translation)
df_disease_precaution['Disease'] = df_disease_precaution['Disease'].map(disease_translation)

### Nama Gejala / Symptom

In [109]:
# Dictionary mapping gejala
symptom_translation = {
    'itching': 'gatal',
    'skin_rash': 'ruam kulit',
    'nodal_skin_eruptions': 'erupsi kulit nodular',
    'dischromic _patches': 'bercak kulit tidak merata warna',
    'continuous_sneezing': 'bersin terus-menerus',
    'shivering': 'menggigil',
    'chills': 'kedinginan',
    'watering_from_eyes': 'mata berair',
    'stomach_pain': 'sakit perut',
    'acidity': 'asam lambung',
    'ulcers_on_tongue': 'luka di lidah',
    'vomiting': 'muntah',
    'cough': 'batuk',
    'chest_pain': 'nyeri dada',
    'yellowish_skin': 'kulit menguning',
    'nausea': 'mual',
    'loss_of_appetite': 'kehilangan nafsu makan',
    'abdominal_pain': 'nyeri perut',
    'yellowing_of_eyes': 'mata menguning',
    'burning_micturition': 'nyeri saat buang air kecil',
    'spotting_ urination': 'bercak saat buang air kecil',
    'passage_of_gases': 'buang angin',
    'internal_itching': 'gatal dari dalam tubuh',
    'indigestion': 'gangguan pencernaan',
    'muscle_wasting': 'penyusutan otot',
    'patches_in_throat': 'bercak di tenggorokan',
    'high_fever': 'demam tinggi',
    'extra_marital_contacts': 'kontak di luar pernikahan',
    'fatigue': 'kelelahan',
    'weight_loss': 'penurunan berat badan',
    'restlessness': 'gelisah',
    'lethargy': 'lesu',
    'irregular_sugar_level': 'gula darah tidak teratur',
    'blurred_and_distorted_vision': 'penglihatan kabur dan terganggu',
    'obesity': 'obesitas',
    'excessive_hunger': 'lapar berlebihan',
    'increased_appetite': 'nafsu makan meningkat',
    'polyuria': 'sering buang air kecil',
    'sunken_eyes': 'mata cekung',
    'dehydration': 'dehidrasi',
    'diarrhoea': 'diare',
    'breathlessness': 'sesak napas',
    'family_history': 'riwayat keluarga',
    'mucoid_sputum': 'dahak berlendir',
    'headache': 'sakit kepala',
    'dizziness': 'pusing',
    'loss_of_balance': 'kehilangan keseimbangan',
    'lack_of_concentration': 'sulit berkonsentrasi',
    'stiff_neck': 'leher kaku',
    'depression': 'depresi',
    'irritability': 'mudah marah',
    'visual_disturbances': 'gangguan penglihatan',
    'back_pain': 'sakit punggung',
    'weakness_in_limbs': 'lemah di anggota tubuh',
    'neck_pain': 'sakit leher',
    'weakness_of_one_body_side': 'kelemahan di satu sisi tubuh',
    'altered_sensorium': 'gangguan kesadaran',
    'dark_urine': 'urin berwarna gelap',
    'sweating': 'berkeringat',
    'muscle_pain': 'nyeri otot',
    'mild_fever': 'demam ringan',
    'swelled_lymph_nodes': 'pembengkakan kelenjar getah bening',
    'malaise': 'rasa tidak enak badan',
    'red_spots_over_body': 'bintik merah di tubuh',
    'joint_pain': 'nyeri sendi',
    'pain_behind_the_eyes': 'nyeri di belakang mata',
    'constipation': 'sembelit',
    'toxic_look_(typhos)': 'penampakan toksik (tifus)',
    'belly_pain': 'nyeri perut bagian bawah',
    'yellow_urine': 'urin kuning',
    'receiving_blood_transfusion': 'pernah transfusi darah',
    'receiving_unsterile_injections': 'pernah injeksi tidak steril',
    'coma': 'koma',
    'stomach_bleeding': 'perdarahan lambung',
    'acute_liver_failure': 'gagal hati akut',
    'swelling_of_stomach': 'pembengkakan perut',
    'distention_of_abdomen': 'perut kembung',
    'history_of_alcohol_consumption': 'riwayat konsumsi alkohol',
    'fluid_overload': 'kelebihan cairan tubuh',
    'phlegm': 'dahak',
    'blood_in_sputum': 'darah di dahak',
    'throat_irritation': 'iritasi tenggorokan',
    'redness_of_eyes': 'mata memerah',
    'sinus_pressure': 'tekanan sinus',
    'runny_nose': 'hidung meler',
    'congestion': 'hidung tersumbat',
    'loss_of_smell': 'kehilangan penciuman',
    'fast_heart_rate': 'detak jantung cepat',
    'rusty_sputum': 'dahak berwarna karat',
    'pain_during_bowel_movements': 'nyeri saat buang air besar',
    'pain_in_anal_region': 'nyeri di anus',
    'bloody_stool': 'tinja berdarah',
    'irritation_in_anus': 'iritasi di anus',
    'cramps': 'kram',
    'bruising': 'memar',
    'swollen_legs': 'kaki bengkak',
    'swollen_blood_vessels': 'pembuluh darah bengkak',
    'prominent_veins_on_calf': 'pembuluh darah menonjol di betis',
    'weight_gain': 'penambahan berat badan',
    'cold_hands_and_feets': 'tangan dan kaki dingin',
    'mood_swings': 'perubahan suasana hati',
    'puffy_face_and_eyes': 'wajah dan mata bengkak',
    'enlarged_thyroid': 'kelenjar tiroid membesar',
    'brittle_nails': 'kuku rapuh',
    'swollen_extremeties': 'pembengkakan anggota tubuh',
    'abnormal_menstruation': 'menstruasi tidak normal',
    'muscle_weakness': 'kelemahan otot',
    'anxiety': 'kecemasan',
    'slurred_speech': 'bicara pelo',
    'palpitations': 'jantung berdebar',
    'drying_and_tingling_lips': 'bibir kering dan kesemutan',
    'knee_pain': 'nyeri lutut',
    'hip_joint_pain': 'nyeri sendi panggul',
    'swelling_joints': 'pembengkakan sendi',
    'painful_walking': 'nyeri saat berjalan',
    'movement_stiffness': 'kekakuan gerak',
    'spinning_movements': 'gerakan berputar',
    'unsteadiness': 'ketidakstabilan',
    'pus_filled_pimples': 'jerawat berisi nanah',
    'blackheads': 'komedo',
    'scurring': 'kerak di kulit kepala',
    'bladder_discomfort': 'ketidaknyamanan kandung kemih',
    'foul_smell_of urine': 'bau tidak sedap pada urin',
    'continuous_feel_of_urine': 'rasa terus ingin buang air kecil',
    'skin_peeling': 'kulit mengelupas',
    'silver_like_dusting': 'serbuk seperti perak di kulit',
    'small_dents_in_nails': 'cekungan kecil di kuku',
    'inflammatory_nails': 'kuku meradang',
    'blister': 'lepuhan',
    'red_sore_around_nose': 'luka merah di sekitar hidung',
    'yellow_crust_ooze': 'lendir kerak kuning'
}

# terapkan mapping ke seluruh kolom gejala
symptom_cols = [f'Symptom_{i}' for i in range(1, 18)]

for col in symptom_cols:
    df_disease[col] = df_disease[col].astype(str).str.strip()  # buang spasi di awal/akhir
    df_disease[col] = df_disease[col].map(symptom_translation)

In [110]:
df_disease.sample(2)

Unnamed: 0,Disease,Symptom_1,Symptom_2,Symptom_3,Symptom_4,Symptom_5,Symptom_6,Symptom_7,Symptom_8,Symptom_9,Symptom_10,Symptom_11,Symptom_12,Symptom_13,Symptom_14,Symptom_15,Symptom_16,Symptom_17
175,Demam berdarah (Dengue),ruam kulit,nyeri sendi,muntah,kelelahan,demam tinggi,sakit kepala,mual,kehilangan nafsu makan,nyeri di belakang mata,sakit punggung,rasa tidak enak badan,nyeri otot,bintik merah di tubuh,,,,
211,Hepatitis C,kelelahan,kulit menguning,mual,kehilangan nafsu makan,mata menguning,riwayat keluarga,,,,,,,,,,,


### Deskripsi Penyakit

In [111]:
# Fungsi untuk menerjemahkan teks
def translate_text(text):
    if pd.isna(text):
        return text  # skip terjemahan kalau isinya NaN
    try:
        return GoogleTranslator(source='auto', target='id').translate(text)
    except Exception as e:
        print(f"Error translating: {str(text)[:30]}... -> {e}")
        return text  # fallback ke teks aslinya kalau error

# Terapkan ke kolom Description
df_disease_description['Description'] = df_disease_description['Description'].apply(translate_text)

In [115]:
df_disease_description.sample(5)

Unnamed: 0,Disease,Description
33,Hepatitis E,Suatu bentuk peradangan hati yang jarang diseb...
34,Demam berdarah (Dengue),Penyakit menular akut yang disebabkan oleh fla...
15,Tukak lambung,Peptik ulkus penyakit (PUD) adalah istirahat d...
29,Migrain,Migrain dapat menyebabkan nyeri berdenyut para...
39,Gastroenteritis,Gastroenteritis adalah peradangan saluran penc...


### Pertolongan Pertama / Precaution

In [113]:
# menerapkan translate_text ke Precaution
for col in ['Precaution_1', 'Precaution_2', 'Precaution_3', 'Precaution_4']:
  df_disease_precaution[col] = df_disease_precaution[col].apply(translate_text)

In [116]:
df_disease_precaution.sample(5)

Unnamed: 0,Disease,Precaution_1,Precaution_2,Precaution_3,Precaution_4
27,Infeksi jamur,mandi dua kali,Gunakan detol atau neem dalam air mandi,Jaga agar area yang terinfeksi tetap kering,Gunakan kain bersih
17,Flu biasa,Minum minuman kaya vitamin C,Ambil uap,Hindari makanan dingin,tetap demam di cek
40,Tuberkulosis,Tutup mulut,Konsultasikan dengan dokter,pengobatan,istirahat
34,Demam berdarah (Dengue),Minumlah jus daun pepaya,Hindari makanan pedas berlemak,Jauhkan nyamuk,tetap terhidrasi
13,Impetigo,Rendam area yang terkena di air hangat,Gunakan antibiotik,Hapus keropeng dengan kain tekan basah,Konsultasikan dengan dokter


## Menyimpan hasil dataset

In [117]:
# Simpan DataFrame ke CSV
df_disease.to_csv("df_disease.csv", index=False)
df_drug.to_csv("df_drug.csv", index=False)
df_disease_description.to_csv("df_disease_description.csv", index=False)
df_disease_precaution.to_csv("df_disease_precaution.csv", index=False)

In [118]:
# download file
files.download("df_disease.csv")
files.download("df_drug.csv")
files.download("df_disease_description.csv")
files.download("df_disease_precaution.csv")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>