In [94]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score
from imblearn.over_sampling import SMOTE

In [95]:
# Memuat data
data = pd.read_csv('heart.csv')

In [96]:
# Memilih hanya fitur yang diinginkan
selected_features = ['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach', 'exang', 'oldpeak', 'target']
data = data[selected_features]

In [97]:
# Menangani Outliers (jika diperlukan)
# Misalnya, menggunakan IQR untuk menghapus outliers
Q1 = data.quantile(0.20)
Q3 = data.quantile(0.80)
IQR = Q3 - Q1
data = data[~((data < (Q1 - 1.5 * IQR)) | (data > (Q3 + 1.5 * IQR))).any(axis=1)]

In [98]:
# Memisahkan fitur dan target
X = data.drop(columns=['target'])
y = data['target']

In [99]:
# Menangani Data yang Tidak Seimbang dengan SMOTE
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

In [100]:
# Normalisasi fitur dengan MinMaxScaler
scaler = MinMaxScaler()
X_resampled = scaler.fit_transform(X_resampled)

In [101]:
# Membagi data menjadi data latih dan data uji (80/20)
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

In [102]:
# Mendefinisikan model SVM
svm = SVC()

In [103]:
# Mendefinisikan parameter untuk GridSearchCV
param_grid = {
    'C': [0.1, 1, 10, 100],
    'gamma': [1, 0.1, 0.01, 0.001],
    'kernel': ['rbf', 'linear']
}

In [104]:
# Menggunakan GridSearchCV untuk mencari parameter terbaik
grid = GridSearchCV(svm, param_grid, refit=True, verbose=2, cv=10)
grid.fit(X_train, y_train)  # Melatih model

Fitting 10 folds for each of 32 candidates, totalling 320 fits
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.0s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.0s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.0s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.0s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.0s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.0s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.0s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.0s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.0s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.0s
[CV] END ......................C=0.1, gamma=1, kernel=linear; total time=   0.0s
[CV] END ......................C=0.1, gamma=1,

In [105]:
# Memprediksi dan evaluasi model pada data uji
predictions = grid.predict(X_test)
print(classification_report(y_test, predictions))
print("Accuracy:", accuracy_score(y_test, predictions))

              precision    recall  f1-score   support

           0       0.93      0.90      0.92        31
           1       0.89      0.92      0.91        26

    accuracy                           0.91        57
   macro avg       0.91      0.91      0.91        57
weighted avg       0.91      0.91      0.91        57

Accuracy: 0.9122807017543859


In [92]:
# Mencetak parameter terbaik
print(f"Best parameters: {grid.best_params_}")

Best parameters: {'C': 1, 'gamma': 1, 'kernel': 'rbf'}


In [93]:
# Memprediksi dan evaluasi model pada data latih
train_predictions = grid.predict(X_train)
print("Train Accuracy:", accuracy_score(y_train, train_predictions))

Train Accuracy: 0.8311111111111111


In [106]:
data.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,target
1,37,1,2,130,250,0,1,187,0,3.5,1
2,41,0,1,130,204,0,0,172,0,1.4,1
3,56,1,1,120,236,0,1,178,0,0.8,1
4,57,0,0,120,354,0,1,163,1,0.6,1
5,57,1,0,140,192,0,1,148,0,0.4,1


In [112]:
input_data = (67, 1, 0, 160, 286, 0, 0, 108, 1, 1.5)

input_data_as_numpy_array = np.array(input_data)

input_data_reshape = input_data_as_numpy_array.reshape(1, -1)

std_data = scaler.transform(input_data_reshape)

predictions = grid.predict(std_data)

print(predictions)
if (predictions[0] == 0):
    print('Pasien tidak terkena penyakit jantung')
else :
    print('Pasien terkena penyakit jantung')

[0]
Pasien tidak terkena penyakit jantung




In [113]:
import pickle
from sklearn.model_selection import GridSearchCV
filename = 'jantung_model.sav'
pickle.dump(grid, open(filename,'wb'))
with open('scaler.sav', 'wb') as file:
    pickle.dump(scaler, file)