In [1]:
import csv

# Patient data as provided in the requirements
patient_data = [
    ["id", "name", "age", "symptoms"],
    [1, "Andi", 45, "demam, batuk, sesak napas"],
    [2, "Budi", 29, "mual, sakit perut"],
    [3, "Citra", 62, "pusing, kehilangan keseimbangan"],
    [4, "Dita", 30, "susah tidur"],
    [5, "Eka", 18, "gusi berdarah"],
    [6, "Fitra", 49, "pusing, sakit perut"],
    [7, "Gio", 49, "menggigil, batuk, sakit kepala"],
    [8, "Harianto", 33, "memar di tangan"],
    [9, "Idul", 88, "susah tidur"],
    [10, "Jaka", 54, "sesak nafas"]
]

# Write to CSV file
with open('patients.csv', 'w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerows(patient_data)

In [2]:
import pandas as pd

# Load the CSV file
df = pd.read_csv('patients.csv')

# Display the loaded data
print(df.info())
print(df.head())
print(df.dtypes)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   id        10 non-null     int64 
 1   name      10 non-null     object
 2   age       10 non-null     int64 
 3   symptoms  10 non-null     object
dtypes: int64(2), object(2)
memory usage: 448.0+ bytes
None
   id   name  age                         symptoms
0   1   Andi   45        demam, batuk, sesak napas
1   2   Budi   29                mual, sakit perut
2   3  Citra   62  pusing, kehilangan keseimbangan
3   4   Dita   30                      susah tidur
4   5    Eka   18                    gusi berdarah
id           int64
name        object
age          int64
symptoms    object
dtype: object


In [3]:
def tokenize_symptoms(symptoms_string):
    """
    Convert symptoms string into a list of individual symptoms
    """
    # Split by comma and strip whitespace
    symptoms_list = [symptom.strip() for symptom in symptoms_string.split(',')]
    return symptoms_list

# Apply tokenization to symptoms column
df['symptoms_list'] = df['symptoms'].apply(tokenize_symptoms)

# Add a column for symptom count
df['symptom_count'] = df['symptoms_list'].apply(len)

print("Symptoms tokenized successfully!")
print("\nDataFrame with tokenized symptoms:")
for index, row in df.iterrows():
    print(f"ID: {row['id']}, Name: {row['name']}, Age: {row['age']}")
    print(f"  Symptoms: {row['symptoms_list']}")
    print(f"  Symptom count: {row['symptom_count']}")
    print()

Symptoms tokenized successfully!

DataFrame with tokenized symptoms:
ID: 1, Name: Andi, Age: 45
  Symptoms: ['demam', 'batuk', 'sesak napas']
  Symptom count: 3

ID: 2, Name: Budi, Age: 29
  Symptoms: ['mual', 'sakit perut']
  Symptom count: 2

ID: 3, Name: Citra, Age: 62
  Symptoms: ['pusing', 'kehilangan keseimbangan']
  Symptom count: 2

ID: 4, Name: Dita, Age: 30
  Symptoms: ['susah tidur']
  Symptom count: 1

ID: 5, Name: Eka, Age: 18
  Symptoms: ['gusi berdarah']
  Symptom count: 1

ID: 6, Name: Fitra, Age: 49
  Symptoms: ['pusing', 'sakit perut']
  Symptom count: 2

ID: 7, Name: Gio, Age: 49
  Symptoms: ['menggigil', 'batuk', 'sakit kepala']
  Symptom count: 3

ID: 8, Name: Harianto, Age: 33
  Symptoms: ['memar di tangan']
  Symptom count: 1

ID: 9, Name: Idul, Age: 88
  Symptoms: ['susah tidur']
  Symptom count: 1

ID: 10, Name: Jaka, Age: 54
  Symptoms: ['sesak nafas']
  Symptom count: 1



In [4]:
def filter_patients(df):
    """
    Filter patients based on:
    - Age > 40
    - Number of symptoms > 2
    """
    filtered_df = df[(df['age'] > 40) & (df['symptom_count'] > 2)]
    return filtered_df

# Apply filter
filtered_patients = filter_patients(df)

print(f"\nOriginal number of patients: {len(df)}")
print(f"Patients after filtering (age > 40 AND symptoms > 2): {len(filtered_patients)}")

print("\nFiltered patients:")
print("=" * 50)
for index, row in filtered_patients.iterrows():
    print(f"ID: {row['id']}")
    print(f"Name: {row['name']}")
    print(f"Age: {row['age']}")
    print(f"Symptoms: {row['symptoms_list']}")
    print(f"Symptom count: {row['symptom_count']}")
    print("-" * 30)


Original number of patients: 10
Patients after filtering (age > 40 AND symptoms > 2): 2

Filtered patients:
ID: 1
Name: Andi
Age: 45
Symptoms: ['demam', 'batuk', 'sesak napas']
Symptom count: 3
------------------------------
ID: 7
Name: Gio
Age: 49
Symptoms: ['menggigil', 'batuk', 'sakit kepala']
Symptom count: 3
------------------------------
