# Import Library

In [1]:
import pandas as pd
import os

# Create the CSV File

In [2]:
# Define the data for the CSV file
data = {
    'id': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
    'name': ['Andi', 'Budi', 'Citra', 'Dita', 'Eka', 'Fitra', 'Gio', 'Harianto', 'Idul', 'Jaka'],
    'age': [45, 29, 62, 30, 18, 49, 49, 33, 88, 54],
    'symptoms': [
        "demam, batuk, sesak napas",
        "mual, sakit perut",
        "pusing, kehilangan keseimbangan",
        "susah tidur",
        "gusi berdarah",
        "pusing, sakit perut",
        "menggigil, batuk, sakit kepala",
        "memar di tangan",
        "susah tidur",
        "sesak nafas"
    ]
}

In [3]:
# Create a DataFrame
df_create = pd.DataFrame(data)

In [7]:
# Define the CSV file name
csv_file_name = 'patients.csv'

# Check if the CSV file already exists
if not os.path.exists(csv_file_name):
    # Save the DataFrame to a CSV file
    df_create.to_csv(csv_file_name, index=False)
    print(f"File '{csv_file_name}' berhasil dibuat.")
else:
    print(f"File '{csv_file_name}' sudah ada. Melewati pembuatan file.")

File 'patients.csv' berhasil dibuat.


# Task

## Loads the CSV

In [8]:
# Load the CSV file
try:
    df = pd.read_csv(csv_file_name)
    print(f"File '{csv_file_name}' berhasil dimuat.")
except FileNotFoundError:
    print(f"Error: File '{csv_file_name}' tidak ditemukan. Pastikan file ada di direktori yang sama.")
    exit()

print("\nData mentah:")
print(df)

File 'patients.csv' berhasil dimuat.

Data mentah:
   id      name  age                         symptoms
0   1      Andi   45        demam, batuk, sesak napas
1   2      Budi   29                mual, sakit perut
2   3     Citra   62  pusing, kehilangan keseimbangan
3   4      Dita   30                      susah tidur
4   5       Eka   18                    gusi berdarah
5   6     Fitra   49              pusing, sakit perut
6   7       Gio   49   menggigil, batuk, sakit kepala
7   8  Harianto   33                  memar di tangan
8   9      Idul   88                      susah tidur
9  10      Jaka   54                      sesak nafas


## Tokenizes the symptoms into lists

In [9]:
# Apply a function to split the symptoms
df['symptoms_list'] = df['symptoms'].apply(lambda x: [s.strip() for s in x.split(',')])
print("\nGejala setelah tokenisasi:")
print(df[['id', 'name', 'symptoms_list']])


Gejala setelah tokenisasi:
   id      name                      symptoms_list
0   1      Andi        [demam, batuk, sesak napas]
1   2      Budi                [mual, sakit perut]
2   3     Citra  [pusing, kehilangan keseimbangan]
3   4      Dita                      [susah tidur]
4   5       Eka                    [gusi berdarah]
5   6     Fitra              [pusing, sakit perut]
6   7       Gio   [menggigil, batuk, sakit kepala]
7   8  Harianto                  [memar di tangan]
8   9      Idul                      [susah tidur]
9  10      Jaka                      [sesak nafas]


## Filters patients older than 40 with more than 2 symptoms

In [18]:
# First, filter by age
filtered_by_age = df[df['age'] > 40]

In [20]:
# Then, filter by the number of symptoms (length of the symptoms_list)
filtered_patients = filtered_by_age[
    filtered_by_age['symptoms_list'].apply(lambda x: len(x) > 2 if isinstance(x, list) else False)
]

print("\nPasien yang disaring (usia > 40 dan gejala > 2):")
print(filtered_patients[['id', 'name', 'age', 'symptoms_list']])


Pasien yang disaring (usia > 40 dan gejala > 2):
   id  name  age                     symptoms_list
0   1  Andi   45       [demam, batuk, sesak napas]
6   7   Gio   49  [menggigil, batuk, sakit kepala]
