In [3]:
import pandas as pd

# Memuat dataset
file_path = 'hypertension_data.csv'  # Ganti dengan path dataset Anda
data = pd.read_csv(file_path)

# Menampilkan beberapa baris pertama untuk memeriksa data
print(data.head())


    age  sex  cp  trestbps  chol  fbs  restecg  thalach  exang  oldpeak  \
0  57.0  1.0   3       145   233    1        0      150      0      2.3   
1  64.0  0.0   2       130   250    0        1      187      0      3.5   
2  52.0  1.0   1       130   204    0        0      172      0      1.4   
3  56.0  0.0   1       120   236    0        1      178      0      0.8   
4  66.0  0.0   0       120   354    0        1      163      1      0.6   

   slope  ca  thal  target  
0      0   0     1       1  
1      0   0     2       1  
2      2   0     2       1  
3      2   0     2       1  
4      2   0     2       1  


In [4]:
# Memeriksa missing values
print(data.isnull().sum())

# Mengisi missing values di kolom 'sex' dengan modus (nilai yang paling sering muncul)
data['sex'].fillna(data['sex'].mode()[0], inplace=True)

# Verifikasi kembali apakah masih ada missing values
print(data.isnull().sum())


age          0
sex         25
cp           0
trestbps     0
chol         0
fbs          0
restecg      0
thalach      0
exang        0
oldpeak      0
slope        0
ca           0
thal         0
target       0
dtype: int64
age         0
sex         0
cp          0
trestbps    0
chol        0
fbs         0
restecg     0
thalach     0
exang       0
oldpeak     0
slope       0
ca          0
thal        0
target      0
dtype: int64


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data['sex'].fillna(data['sex'].mode()[0], inplace=True)


In [5]:
from sklearn.preprocessing import LabelEncoder

# Daftar kolom kategorikal
categorical_columns = ['sex', 'cp', 'restecg', 'exang', 'slope', 'ca', 'thal']

# Membuat objek LabelEncoder untuk setiap kolom
label_encoders = {}

# Melakukan encoding
for col in categorical_columns:
    label_encoders[col] = LabelEncoder()
    data[col] = label_encoders[col].fit_transform(data[col])

# Memeriksa hasil encoding
print(data.head())


    age  sex  cp  trestbps  chol  fbs  restecg  thalach  exang  oldpeak  \
0  57.0    1   3       145   233    1        0      150      0      2.3   
1  64.0    0   2       130   250    0        1      187      0      3.5   
2  52.0    1   1       130   204    0        0      172      0      1.4   
3  56.0    0   1       120   236    0        1      178      0      0.8   
4  66.0    0   0       120   354    0        1      163      1      0.6   

   slope  ca  thal  target  
0      0   0     1       1  
1      0   0     2       1  
2      2   0     2       1  
3      2   0     2       1  
4      2   0     2       1  


In [6]:
from sklearn.preprocessing import StandardScaler

# Memisahkan fitur dan target
X = data.drop(columns=['target'])
y = data['target']

# Melakukan standarisasi pada fitur
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Memeriksa beberapa baris pertama dari data yang sudah dinormalisasi
print(pd.DataFrame(X_scaled, columns=X.columns).head())


        age       sex        cp  trestbps      chol       fbs   restecg  \
0  0.088128  1.000959  1.993733  0.762261 -0.256495  2.382787 -1.001675   
1  0.548973 -0.999042  1.017086 -0.090570  0.072691 -0.419677  0.900800   
2 -0.241048  1.000959  0.040439 -0.090570 -0.818048 -0.419677 -1.001675   
3  0.022292 -0.999042  0.040439 -0.659124 -0.198404 -0.419677  0.900800   
4  0.680643 -0.999042 -0.936208 -0.659124  2.086535 -0.419677  0.900800   

    thalach     exang   oldpeak     slope       ca      thal  
0  0.015092 -0.696378  1.081856 -2.271363 -0.71358 -2.181027  
1  1.633805 -0.696378  2.111797 -2.271363 -0.71358 -0.527170  
2  0.977570 -0.696378  0.309401  0.972748 -0.71358 -0.527170  
3  1.240064 -0.696378 -0.205570  0.972748 -0.71358 -0.527170  
4  0.583829  1.436003 -0.377226  0.972748 -0.71358 -0.527170  


In [7]:
from sklearn.model_selection import train_test_split

# Membagi data menjadi data latih dan data uji (80:20)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Memeriksa ukuran dari dataset yang telah dibagi
print(f'Ukuran X_train: {X_train.shape}, y_train: {y_train.shape}')
print(f'Ukuran X_test: {X_test.shape}, y_test: {y_test.shape}')


Ukuran X_train: (20866, 13), y_train: (20866,)
Ukuran X_test: (5217, 13), y_test: (5217,)


In [8]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Membuat model KNN dengan nilai K tertentu
knn = KNeighborsClassifier(n_neighbors=5)

# Melatih model dengan data latih
knn.fit(X_train, y_train)

# Melakukan prediksi pada data uji
y_pred = knn.predict(X_test)

# Menghitung metrik evaluasi
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print(f'Akurasi: {accuracy:.2f}')
print(f'Precision: {precision:.2f}')
print(f'Recall: {recall:.2f}')
print(f'F1-Score: {f1:.2f}')


Akurasi: 1.00
Precision: 1.00
Recall: 1.00
F1-Score: 1.00
