<a href="https://colab.research.google.com/github/Fakhryrama/Data_Mining/blob/main/SVM_%26_ANN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
from imblearn.over_sampling import SMOTE
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

Prepare Dataset

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
df = pd.read_csv('/content/drive/MyDrive/Datasets/winequality-red.csv', delimiter=';')
df.shape
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1599 entries, 0 to 1598
Data columns (total 12 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   fixed acidity         1599 non-null   float64
 1   volatile acidity      1599 non-null   float64
 2   citric acid           1599 non-null   float64
 3   residual sugar        1599 non-null   float64
 4   chlorides             1599 non-null   float64
 5   free sulfur dioxide   1599 non-null   float64
 6   total sulfur dioxide  1599 non-null   float64
 7   density               1599 non-null   float64
 8   pH                    1599 non-null   float64
 9   sulphates             1599 non-null   float64
 10  alcohol               1599 non-null   float64
 11  quality               1599 non-null   int64  
dtypes: float64(11), int64(1)
memory usage: 150.0 KB


In [None]:
# Pisahkan fitur dan target
X = df.drop(columns=['quality'])
y = df['quality']

In [None]:
# Standarisasi data numerik
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [None]:
# Resampling dengan SMOTE
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42, stratify=y)
smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)
print(y_train_resampled.value_counts())

quality
6    545
5    545
7    545
3    545
4    545
8    545
Name: count, dtype: int64


In [None]:
print(np.isnan(X_train_resampled).sum(), np.isinf(X_train_resampled).sum())
X_train_resampled = np.nan_to_num(X_train_resampled)

0 0


In [None]:
# Pipeline untuk SVM
pipeline_svm = Pipeline([
    ('scaler', StandardScaler()),
    ('classifier', SVC(kernel='rbf', probability=True))
])
param_grid = [
    {'classifier__C': [0.1, 1, 10], 'classifier__kernel': ['linear']},
    {'classifier__C': [0.1, 1, 10], 'classifier__gamma': ['scale', 'auto'], 'classifier__kernel': ['rbf', 'poly']}
]

In [None]:
# Hyperparameter tuning
param_grid = {
    'classifier__C': [0.1, 1, 10],
    'classifier__gamma': ['scale', 'auto'],
    'classifier__kernel': ['linear', 'rbf', 'poly']
}
grid_svm = GridSearchCV(pipeline_svm, param_grid, cv=5, scoring='f1_micro', n_jobs=-1)
grid_svm.fit(X_train_resampled, y_train_resampled)

In [None]:
# Evaluasi SVM
y_pred_svm = grid_svm.best_estimator_.predict(X_test)
y_pred_proba_svm = grid_svm.best_estimator_.predict_proba(X_test)
svm_acc = accuracy_score(y_test, y_pred_svm)
svm_f1 = f1_score(y_test, y_pred_svm, average='weighted')
svm_auc = roc_auc_score(y_test, y_pred_proba_svm, multi_class='ovr')
print(f"SVM - Akurasi: {svm_acc}, F1: {svm_f1}, AUC: {svm_auc}")

SVM - Akurasi: 0.571875, F1: 0.5878151731138376, AUC: 0.7540633193772783


In [None]:
# Model ANN dengan PyTorch
class ANNModel(nn.Module):
    def __init__(self, activation_fn):
        super(ANNModel, self).__init__()
        self.fc1 = nn.Linear(X_train.shape[1], 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 6)
        self.activation = activation_fn

    def forward(self, x):
        x = self.activation(self.fc1(x))
        x = self.activation(self.fc2(x))
        x = self.fc3(x)
        return x

In [None]:
# Konversi data ke tensor
X_train_tensor = torch.tensor(X_train_resampled, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train_resampled.values, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [None]:
# Pilihan Activation Function
activation_functions = {
    'ReLU': nn.ReLU(),
    'Sigmoid': nn.Sigmoid(),
    'Tanh': nn.Tanh(),
    'LeakyReLU': nn.LeakyReLU()
}

In [None]:
# Training function
def train_model(activation_fn, train_loader, test_loader):
    model = ANNModel(activation_fn)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    for epoch in range(10):  # Training selama 10 epoch
        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            # Shift labels to start from 0 (assuming original labels are 3-8)
            loss = criterion(outputs, (labels.long() - 3).view(-1))
            loss.backward()
            optimizer.step()

 # Evaluasi Model
    y_true, y_pred = [], []
    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            y_true.extend(labels.tolist())
            # Add 3 back to predictions to get original scale
            y_pred.extend((torch.argmax(outputs, dim=1) + 3).tolist())

    return f1_score(y_true, y_pred, average='weighted')

In [None]:
# Bandingkan Activation Functions
for name, activation in activation_functions.items():
    f1 = train_model(activation, train_loader, test_loader)
    print(f"F1-score dengan {name}: {f1}")

F1-score dengan ReLU: 0.48856627549057335
F1-score dengan Sigmoid: 0.37920684558588125
F1-score dengan Tanh: 0.4389203901560326
F1-score dengan LeakyReLU: 0.466817600226164
