In [2]:
import numpy as np
import torch
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, roc_auc_score
from torch.optim import SGD
import torch.nn.functional as F
from sklearn.model_selection import train_test_split, StratifiedKFold
import pandas as pd
from sklearn.preprocessing import StandardScaler
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset



In [3]:

torch.cuda.empty_cache()

In [4]:

# def create_svm_model(input_dim, num_classes, n_support_vectors=10, kernel='rbf', C=1.0):
#     """
#     Create a PyTorch-based SVM model with all necessary components
#     """
#     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    
#     # Create model parameters
#     model_params = {
#         'support_vectors': torch.randn(n_support_vectors, input_dim, device=device, requires_grad=True),
#         'dual_coef': torch.randn(n_support_vectors, num_classes, device=device, requires_grad=True),
#         'intercept': torch.zeros(num_classes, device=device, requires_grad=True)
#     }
    
#     def kernel_function(x1, x2, kernel_type=kernel):
#         if kernel_type == 'linear':
#             return torch.mm(x1, x2.t())
#         elif kernel_type == 'rbf':
#             distances = torch.cdist(x1, x2, p=2).pow(2)
#             gamma = 1.0 / x1.shape[1]
#             return torch.exp(-gamma * distances)
#         elif kernel_type == 'poly':
#             return (torch.mm(x1, x2.t()) + 1).pow(3)
#         else:
#             raise ValueError("Unsupported kernel type")
    
#     def forward_pass(x):
#         kernel_matrix = kernel_function(x, model_params['support_vectors'])
#         return torch.mm(kernel_matrix, model_params['dual_coef']) + model_params['intercept']
    
#     return forward_pass, model_params


In [43]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
def calculate_metrics(y_true, y_pred, y_pred_proba=None):
    metrics = {
        'accuracy': accuracy_score(y_true, y_pred),
        'precision': precision_score(y_true, y_pred, average='weighted', zero_division=0),
        'recall': recall_score(y_true, y_pred, average='weighted', zero_division=0),
        'f1': f1_score(y_true, y_pred, average='weighted', zero_division=0)
    }
    
    if y_pred_proba is not None:
        try:
            # For multiclass ROC AUC
            lb = LabelBinarizer()
            y_true_bin = lb.fit_transform(y_true)
            if y_pred_proba.shape[1] == 2:  # Binary classification
                metrics['roc_auc'] = roc_auc_score(y_true_bin, y_pred_proba[:, 1])
            else:  # Multi-class
                metrics['roc_auc'] = roc_auc_score(
                    y_true_bin, 
                    y_pred_proba,
                    multi_class='ovr',
                    average='weighted'
                )
        except Exception as e:
            print(f"Warning: Could not calculate ROC AUC: {str(e)}")
            metrics['roc_auc'] = None
    
    return metrics


In [44]:
def run_cross_validation(model, X_train, y_train, skf, learning_rate=0.001, epochs=100):
    fold_results = []

    for train_idx, val_idx in skf.split(X_train, y_train):
        X_fold_train, X_fold_val = X_train[train_idx], X_train[val_idx]
        y_fold_train, y_fold_val = y_train[train_idx], y_train[val_idx]

        # Buat model baru untuk setiap fold
        model = bpnn(num_hidden_layers=2, num_neurons=64, activation_functions={
            'hidden': 'sigmoid',
            'output': 'sigmoid'
        }).to(device)

        # Definisikan loss function dan optimizer
        criterion = nn.BCELoss()  # Sesuaikan jika output bukan binary
        optimizer = optim.Adam(model.parameters(), lr=learning_rate)

        # Pelatihan
        for epoch in range(epochs):
            model.train()  # Set model ke mode training
            optimizer.zero_grad()  # Reset gradien
            outputs = model(torch.tensor(X_fold_train, dtype=torch.float32).to(device))  # Forward pass
            loss = criterion(outputs, torch.tensor(y_fold_train, dtype=torch.float32).to(device).view(-1, 1))  # Hitung loss
            loss.backward()  # Backward pass
            optimizer.step()  # Update parameter

        # Evaluasi pada validation set
        model.eval()  # Set model ke mode evaluasi
        with torch.no_grad():
            val_outputs = model(torch.tensor(X_fold_val, dtype=torch.float32).to(device))  # Forward pass
            val_loss = criterion(val_outputs, torch.tensor(y_fold_val, dtype=torch.float32).to(device).view(-1, 1))

        fold_results.append(val_loss.item())  # Simpan hasil loss fold ini

    # Kembalikan rata-rata dari hasil cross-validation
    return np.mean(fold_results)

In [45]:
def bpnn(num_hidden_layers, num_neurons, activation_functions):
    # Tentukan perangkat (GPU jika tersedia)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Tentukan arsitektur jaringan
    layers = []

    # Tambahkan input layer
    layers.append(nn.Linear(num_neurons, num_neurons))  # 'input_dim' harus ditentukan di cross_validation
    if activation_functions['hidden'] == 'relu':
        layers.append(nn.ReLU())
    elif activation_functions['hidden'] == 'sigmoid':
        layers.append(nn.Sigmoid())

    # Tambahkan hidden layers
    for _ in range(1, num_hidden_layers):
        layers.append(nn.Linear(num_neurons, num_neurons))
        if activation_functions['hidden'] == 'relu':
            layers.append(nn.ReLU())
        elif activation_functions['hidden'] == 'sigmoid':
            layers.append(nn.Sigmoid())

    # Tambahkan output layer
    layers.append(nn.Linear(num_neurons, 1))
    if activation_functions['output'] == 'sigmoid':
        layers.append(nn.Sigmoid())
    elif activation_functions['output'] == 'softmax':
        layers.append(nn.Softmax(dim=1))

    # Buat model dan pindahkan ke perangkat
    model = nn.Sequential(*layers).to(device)

    return model

In [46]:

# Create sample data
df=pd.read_csv("diabetes_012_health_indicators_BRFSS2015.csv")


In [47]:
df=df.astype(np.int8)

In [48]:
df['Diabetes_012'].value_counts()

Diabetes_012
0    213703
2     35346
1      4631
Name: count, dtype: int64

In [49]:
df['Diabetes_012']

0         0
1         0
2         0
3         0
4         0
         ..
253675    0
253676    2
253677    0
253678    0
253679    2
Name: Diabetes_012, Length: 253680, dtype: int8

In [50]:
from imblearn.under_sampling import RandomUnderSampler
from sklearn.model_selection import StratifiedKFold

In [51]:
X = np.array(df.drop(columns='Diabetes_012'))
y = np.array(df['Diabetes_012'])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)
undersampler = RandomUnderSampler(sampling_strategy='auto', random_state=42)

# Lakukan undersampling
X_resampled, y_resampled = undersampler.fit_resample(X, y)
# Create cross-validation splitter

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# Get dimensions from data
input_dim = X.shape[1]
num_classes = len(y)


In [52]:

# Run cross-validation
activation_functions = {
    'hidden': 'sigmoid',  # atau 'relu'
    'output': 'sigmoid'   # atau 'softmax'
}
model = bpnn(2,64,activation_functions)
mean_metrics=run_cross_validation(model, X_train, y_train,skf)
metrics_df = pd.DataFrame(mean_metrics.items(), columns=['Metric', 'Value'])

print(metrics_df)


RuntimeError: mat1 and mat2 shapes cannot be multiplied (162355x21 and 64x64)