In [None]:
!pip install -U imbalanced-learn



In [None]:
from google.colab import drive

# mounting dataset dari gdrive
drive.mount('/content/gdrive', force_remount=True)

# lokasi dataset - ubah sesuai dengan lokasi anda mengupload folder datanya
root_path = 'gdrive/My Drive/Colab Notebooks/Deep Learning Labs/Framingham/'

# opsional - tampilkan info lokasi dataset
print("Path root:", root_path)

Mounted at /content/gdrive
Path root: gdrive/My Drive/Colab Notebooks/Deep Learning Labs/Framingham/


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix, f1_score
from imblearn.pipeline import Pipeline
from imblearn.over_sampling import SMOTE
import warnings
warnings.filterwarnings("ignore")


In [None]:
df = pd.read_csv(root_path + "framingham.csv")
print("Jumlah missing value per kolom:\n", df.isnull().sum())


Jumlah missing value per kolom:
 male                 0
age                  0
education          105
currentSmoker        0
cigsPerDay          29
BPMeds              53
prevalentStroke      0
prevalentHyp         0
diabetes             0
totChol             50
sysBP                0
diaBP                0
BMI                 19
heartRate            1
glucose            388
TenYearCHD           0
dtype: int64


In [None]:
df = df.dropna()


In [None]:
X = df.drop('TenYearCHD', axis=1)
y = df['TenYearCHD']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, shuffle=True)

In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [None]:
print("Jumlah sebelum SMOTE-IPF:", X_train.shape, np.bincount(y_train))

Jumlah sebelum SMOTE-IPF: (2560, 15) [2170  390]


In [None]:
svm = SVC()
svm.fit(X_train_scaled, y_train)
y_pred = svm.predict(X_test_scaled)

print("Classification Report Tanpa SMOTE-IPF:\n", classification_report(y_test, y_pred))
print("F1-Score:", f1_score(y_test, y_pred))


Classification Report Tanpa SMOTE-IPF:
               precision    recall  f1-score   support

           0       0.85      1.00      0.92       931
           1       0.67      0.01      0.02       167

    accuracy                           0.85      1098
   macro avg       0.76      0.51      0.47      1098
weighted avg       0.82      0.85      0.78      1098

F1-Score: 0.023529411764705882


In [None]:
cv = StratifiedKFold(n_splits=5)
pipeline_svm = Pipeline([("scaler", StandardScaler()), ("svm", SVC())])
scores = cross_val_score(pipeline_svm, X, y, cv=cv, scoring="f1")
print("Rata-rata F1-Score CV (tanpa SMOTE-IPF):", scores.mean())


Rata-rata F1-Score CV (tanpa SMOTE-IPF): 0.01692363902082572


In [None]:

import numpy as np
# Menggunakan SMOTE
smote = SMOTE(random_state=42) # Menambahkan random_state untuk reproducibility
X_resampled, y_resampled = smote.fit_resample(X_train_scaled, y_train)

print("Jumlah setelah SMOTE:", X_resampled.shape, np.bincount(y_resampled))

# Melatih SVM dengan data hasil SMOTE
svm_smote = SVC()
svm_smote.fit(X_resampled, y_resampled)
y_pred_smote = svm_smote.predict(X_test_scaled)

print("\nClassification Report dengan SMOTE Tanpa IPF:\n", classification_report(y_test, y_pred_smote))
print("F1-Score (dengan SMOTE Tanpa IPF):", f1_score(y_test, y_pred_smote))

# Cross-validation dengan pipeline SMOTE dan SVM
pipeline_smote_svm = Pipeline([("scaler", StandardScaler()), ("smote", SMOTE(random_state=42)), ("svm", SVC())])
scores_smote = cross_val_score(pipeline_smote_svm, X, y, cv=cv, scoring="f1")
print("\nRata-rata F1-Score CV (dengan SMOTE Tanpa IPF):", scores_smote.mean())


Jumlah setelah SMOTE: (4340, 15) [2170 2170]

Classification Report dengan SMOTE:
               precision    recall  f1-score   support

           0       0.89      0.73      0.81       931
           1       0.25      0.50      0.33       167

    accuracy                           0.70      1098
   macro avg       0.57      0.62      0.57      1098
weighted avg       0.79      0.70      0.73      1098

F1-Score (dengan SMOTE): 0.33400402414486924

Rata-rata F1-Score CV (dengan SMOTE): 0.3326507117117735


In [None]:
from sklearn.neighbors import KNeighborsClassifier
from imblearn.over_sampling import SMOTE
import numpy as np

class SMOTE_IPF:
    def __init__(self, smote_k=5, ipf_k=3, max_iter=5):
        self.smote_k = smote_k
        self.ipf_k = ipf_k
        self.max_iter = max_iter
        self.smote = SMOTE(k_neighbors=self.smote_k)

    def fit_resample(self, X, y):
        # Step 1: SMOTE
        X_resampled, y_resampled = self.smote.fit_resample(X, y)

        # Step 2: Iterative Partitioning Filter (IPF)
        y_array = np.array(y_resampled).reshape(-1, 1)
        data = np.hstack((X_resampled, y_array))

        for _ in range(self.max_iter):
            clf = KNeighborsClassifier(n_neighbors=self.ipf_k)
            X_curr = data[:, :-1]
            y_curr = data[:, -1]

            clf.fit(X_curr, y_curr)
            y_pred = clf.predict(X_curr)

            # Find misclassified points (noisy samples)
            misclassified = y_pred != y_curr

            # If no misclassified, break
            if not np.any(misclassified):
                break

            # Remove misclassified samples
            data = data[~misclassified]

        return data[:, :-1], data[:, -1].astype(int)

In [None]:
smote_ipf = SMOTE_IPF(smote_k=5, ipf_k=3, max_iter=5)
X_resampled, y_resampled = smote_ipf.fit_resample(X_train_scaled, y_train)


In [None]:
print("Jumlah sesudah SMOTE-IPF:", X_resampled.shape, np.bincount(y_resampled))

Jumlah sesudah SMOTE-IPF: (3905, 15) [1737 2168]


In [None]:
svm_resampled = SVC()
svm_resampled.fit(X_resampled, y_resampled)
y_pred_resampled = svm_resampled.predict(X_test_scaled)

print("Classification Report dengan SMOTE-IPF:\n", classification_report(y_test, y_pred_resampled))
print("F1-Score:", f1_score(y_test, y_pred_resampled))


Classification Report dengan SMOTE-IPF:
               precision    recall  f1-score   support

           0       0.91      0.66      0.76       931
           1       0.25      0.62      0.35       167

    accuracy                           0.65      1098
   macro avg       0.58      0.64      0.56      1098
weighted avg       0.81      0.65      0.70      1098

F1-Score: 0.3515358361774744


In [None]:
from imblearn.pipeline import make_pipeline as make_pipeline_imb

pipeline_resampled = make_pipeline_imb(StandardScaler(), SMOTE_IPF(), SVC())
scores_resampled = cross_val_score(pipeline_resampled, X, y, cv=cv, scoring="f1")
print("Rata-rata F1-Score CV (dengan SMOTE-IPF):", scores_resampled.mean())


Rata-rata F1-Score CV (dengan SMOTE-IPF): 0.3319826280490007
