In [508]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder

In [509]:
# Load the dataset
file_path = 'dataKasus.xlsx' 
df = pd.read_excel(file_path, sheet_name='2022')

In [510]:
df_info = df.info()
df_info

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 561 entries, 0 to 560
Data columns (total 13 columns):
 #   Column                            Non-Null Count  Dtype 
---  ------                            --------------  ----- 
 0   NO                                561 non-null    int64 
 1   NAMA                              561 non-null    object
 2   USIA                              558 non-null    object
 3   PARITAS                           561 non-null    int64 
 4   JARAK KELAHIRAN                   554 non-null    object
 5   RIW HIPERTENSI                    561 non-null    object
 6   RIW PE                            561 non-null    object
 7   OBESITAS                          561 non-null    object
 8   RIW DM                            561 non-null    object
 9   RIW HIPERTENSI/PE DALAM KELUARGA  561 non-null    object
 10  SOSEK RENDAH                      561 non-null    object
 11  PE/Non PE                         561 non-null    object
 12  Unnamed: 12           

In [511]:
df2 = df.drop(columns=['Unnamed: 12'])

In [512]:
def counts(nama_kolom):
    return df2[nama_kolom].value_counts()

In [513]:
print(counts('USIA'))

USIA
28 TH     28
25 TH     24
31 TH     23
29 TH     22
24 TH     21
          ..
30 th      1
16         1
39 TH      1
13 TH      1
14 TH      1
Name: count, Length: 72, dtype: int64


In [514]:
df2['USIA'] = df2['USIA'].replace({'TH': '', 'th': ''}, regex=True) 
df2['USIA'] = pd.to_numeric(df['USIA'], errors='coerce') 
mean_age = df2['USIA'].mean()
df2['USIA'] = df2['USIA'].fillna(mean_age)
print(counts('USIA'))

USIA
26.090909    550
26.000000      4
29.000000      2
16.000000      1
28.000000      1
23.000000      1
37.000000      1
21.000000      1
Name: count, dtype: int64


In [515]:
print(counts('RIW HIPERTENSI'))

RIW HIPERTENSI
Tidak    508
Ya        53
Name: count, dtype: int64


In [516]:
df2['RIW HIPERTENSI'] = df2['RIW HIPERTENSI'].map({'Tidak': 0, 'Ya': 1})

In [517]:
print(counts('RIW HIPERTENSI'))

RIW HIPERTENSI
0    508
1     53
Name: count, dtype: int64


In [518]:
print(counts('RIW PE'))

RIW PE
Tidak                     526
PEB                        19
PE                          6
HELLP SYNDROM               2
Impending PE                2
Impending Eklamsia          1
Kejang Konvulsi             1
impending eklamsia          1
PE, HELLP Syndrome          1
PEB impending eklampsi      1
Impending Ekalmsia          1
Name: count, dtype: int64


In [519]:
df2['RIW PE'] = df2['RIW PE'].apply(lambda x: 'Ya' if x != 'Tidak' else 'Tidak')

In [520]:
print(counts('RIW PE'))

RIW PE
Tidak    526
Ya        35
Name: count, dtype: int64


In [521]:
df2['RIW PE'] = df2['RIW PE'].map({'Tidak': 0, 'Ya': 1})

In [522]:
df2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 561 entries, 0 to 560
Data columns (total 12 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   NO                                561 non-null    int64  
 1   NAMA                              561 non-null    object 
 2   USIA                              561 non-null    float64
 3   PARITAS                           561 non-null    int64  
 4   JARAK KELAHIRAN                   554 non-null    object 
 5   RIW HIPERTENSI                    561 non-null    int64  
 6   RIW PE                            561 non-null    int64  
 7   OBESITAS                          561 non-null    object 
 8   RIW DM                            561 non-null    object 
 9   RIW HIPERTENSI/PE DALAM KELUARGA  561 non-null    object 
 10  SOSEK RENDAH                      561 non-null    object 
 11  PE/Non PE                         561 non-null    object 
dtypes: float

In [523]:
print(counts('RIW DM'))

RIW DM
Tidak    556
Ya         5
Name: count, dtype: int64


In [524]:
df2['RIW DM'] = df2['RIW DM'].map({'Tidak': 0, 'Ya': 1})

In [525]:
print(counts('RIW DM'))

RIW DM
0    556
1      5
Name: count, dtype: int64


In [526]:
print(counts('SOSEK RENDAH'))

SOSEK RENDAH
>UMR    557
<UMR      4
Name: count, dtype: int64


In [527]:
df2['SOSEK RENDAH'] = df2['SOSEK RENDAH'].map({'>UMR': 0, '<UMR': 1})

In [528]:
print(counts('SOSEK RENDAH'))

SOSEK RENDAH
0    557
1      4
Name: count, dtype: int64


In [529]:
df2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 561 entries, 0 to 560
Data columns (total 12 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   NO                                561 non-null    int64  
 1   NAMA                              561 non-null    object 
 2   USIA                              561 non-null    float64
 3   PARITAS                           561 non-null    int64  
 4   JARAK KELAHIRAN                   554 non-null    object 
 5   RIW HIPERTENSI                    561 non-null    int64  
 6   RIW PE                            561 non-null    int64  
 7   OBESITAS                          561 non-null    object 
 8   RIW DM                            561 non-null    int64  
 9   RIW HIPERTENSI/PE DALAM KELUARGA  561 non-null    object 
 10  SOSEK RENDAH                      561 non-null    int64  
 11  PE/Non PE                         561 non-null    object 
dtypes: float

In [530]:
print(counts('JARAK KELAHIRAN'))

JARAK KELAHIRAN
anak pertama    260
> 2 tahun       211
< 2 tahun        81
> 2 tahun         2
Name: count, dtype: int64


In [531]:
df2['JARAK KELAHIRAN'] = df2['JARAK KELAHIRAN'].str.replace('> 2 tahun ','> 2 tahun')

In [532]:
print(counts('JARAK KELAHIRAN'))

JARAK KELAHIRAN
anak pertama    260
> 2 tahun       213
< 2 tahun        81
Name: count, dtype: int64


In [533]:
label_encoder = LabelEncoder()
df2['JARAK KELAHIRAN'] = label_encoder.fit_transform(df2['JARAK KELAHIRAN'])

In [534]:
print(counts('JARAK KELAHIRAN'))

JARAK KELAHIRAN
2    260
1    213
0     81
3      7
Name: count, dtype: int64


In [535]:
df2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 561 entries, 0 to 560
Data columns (total 12 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   NO                                561 non-null    int64  
 1   NAMA                              561 non-null    object 
 2   USIA                              561 non-null    float64
 3   PARITAS                           561 non-null    int64  
 4   JARAK KELAHIRAN                   561 non-null    int32  
 5   RIW HIPERTENSI                    561 non-null    int64  
 6   RIW PE                            561 non-null    int64  
 7   OBESITAS                          561 non-null    object 
 8   RIW DM                            561 non-null    int64  
 9   RIW HIPERTENSI/PE DALAM KELUARGA  561 non-null    object 
 10  SOSEK RENDAH                      561 non-null    int64  
 11  PE/Non PE                         561 non-null    object 
dtypes: float

In [536]:
df2['OBESITAS'] = df2['OBESITAS'].map({'Tidak': 0, 'Ya': 1})

In [537]:
counts('OBESITAS')

OBESITAS
0    556
1      5
Name: count, dtype: int64

In [538]:
df2['RIW HIPERTENSI/PE DALAM KELUARGA'] = df2['RIW HIPERTENSI/PE DALAM KELUARGA'].map({'Tidak' : 0, 'Ada': 1})

In [539]:
counts('RIW HIPERTENSI/PE DALAM KELUARGA')

RIW HIPERTENSI/PE DALAM KELUARGA
0    550
1     11
Name: count, dtype: int64

In [540]:
df2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 561 entries, 0 to 560
Data columns (total 12 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   NO                                561 non-null    int64  
 1   NAMA                              561 non-null    object 
 2   USIA                              561 non-null    float64
 3   PARITAS                           561 non-null    int64  
 4   JARAK KELAHIRAN                   561 non-null    int32  
 5   RIW HIPERTENSI                    561 non-null    int64  
 6   RIW PE                            561 non-null    int64  
 7   OBESITAS                          561 non-null    int64  
 8   RIW DM                            561 non-null    int64  
 9   RIW HIPERTENSI/PE DALAM KELUARGA  561 non-null    int64  
 10  SOSEK RENDAH                      561 non-null    int64  
 11  PE/Non PE                         561 non-null    object 
dtypes: float

In [541]:
counts('PE/Non PE')

PE/Non PE
Non PE                    520
PEB                        20
PE                         17
Eklamsia                    1
PE gemelli                  1
PEB impending eklampsi      1
PE                          1
Name: count, dtype: int64

In [542]:
df2['PE/Non PE'] = df2['PE/Non PE'].apply(lambda x: 'PE' if x != 'Non PE' else 'Non PE')

In [543]:
counts('PE/Non PE')

PE/Non PE
Non PE    520
PE         41
Name: count, dtype: int64

In [544]:
df2['PE/Non PE'] = df2['PE/Non PE'].map({'PE': 1, 'Non PE': 0})

In [545]:
counts('PE/Non PE')

PE/Non PE
0    520
1     41
Name: count, dtype: int64

In [546]:
df2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 561 entries, 0 to 560
Data columns (total 12 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   NO                                561 non-null    int64  
 1   NAMA                              561 non-null    object 
 2   USIA                              561 non-null    float64
 3   PARITAS                           561 non-null    int64  
 4   JARAK KELAHIRAN                   561 non-null    int32  
 5   RIW HIPERTENSI                    561 non-null    int64  
 6   RIW PE                            561 non-null    int64  
 7   OBESITAS                          561 non-null    int64  
 8   RIW DM                            561 non-null    int64  
 9   RIW HIPERTENSI/PE DALAM KELUARGA  561 non-null    int64  
 10  SOSEK RENDAH                      561 non-null    int64  
 11  PE/Non PE                         561 non-null    int64  
dtypes: float

In [547]:
df2 = df2.drop(columns=['NAMA'])

In [548]:
df2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 561 entries, 0 to 560
Data columns (total 11 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   NO                                561 non-null    int64  
 1   USIA                              561 non-null    float64
 2   PARITAS                           561 non-null    int64  
 3   JARAK KELAHIRAN                   561 non-null    int32  
 4   RIW HIPERTENSI                    561 non-null    int64  
 5   RIW PE                            561 non-null    int64  
 6   OBESITAS                          561 non-null    int64  
 7   RIW DM                            561 non-null    int64  
 8   RIW HIPERTENSI/PE DALAM KELUARGA  561 non-null    int64  
 9   SOSEK RENDAH                      561 non-null    int64  
 10  PE/Non PE                         561 non-null    int64  
dtypes: float64(1), int32(1), int64(9)
memory usage: 46.1 KB


In [551]:
df2.to_excel('dataset_baru.xlsx')