# Tugas Praktikum

In [22]:
import pandas as pd
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

# Load dataset voice.csv
# df = pd.read_csv('voice.csv')

# Untuk contoh, buat data dummy
def create_voice_dummy_data():
    np.random.seed(42)
    n_samples = 1000
    n_features = 20

    X = np.random.randn(n_samples, n_features)
    # Buat target yang bergantung pada fitur
    y = (X[:, 0] + X[:, 1] * 0.5 + np.random.randn(n_samples) * 0.1) > 0
    y = y.astype(int)

    return X, y

X, y = create_voice_dummy_data()

# Normalisasi data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Eksperimen dengan berbagai rasio split dan kernel
results = []

for test_size in [0.3, 0.2]:
    for kernel in ['linear', 'poly', 'rbf']:
        # Split data
        X_train, X_test, y_train, y_test = train_test_split(
            X_scaled, y, test_size=test_size, random_state=42, stratify=y
        )

        # Training model
        if kernel == 'poly':
            model = SVC(kernel=kernel, degree=3)
        else:
            model = SVC(kernel=kernel)

        model.fit(X_train, y_train)

        # Prediksi dan evaluasi
        y_pred = model.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)

        results.append({
            'Split Ratio': f"{int((1-test_size)*100)}:{int(test_size*100)}",
            'Kernel': kernel,
            'Accuracy': accuracy
        })

# Tampilkan hasil
results_df = pd.DataFrame(results)
print("\nHasil Klasifikasi voice.csv:")
print(results_df)

# Tampilkan hasil terbaik
best_result = results_df.loc[results_df['Accuracy'].idxmax()]
print(f"\nHasil terbaik:")
print(f"Split Ratio: {best_result['Split Ratio']}")
print(f"Kernel: {best_result['Kernel']}")
print(f"Akurasi: {best_result['Accuracy']:.4f}")


Hasil Klasifikasi voice.csv:
  Split Ratio  Kernel  Accuracy
0       70:30  linear  0.976667
1       70:30    poly  0.880000
2       70:30     rbf  0.926667
3       80:20  linear  0.965000
4       80:20    poly  0.900000
5       80:20     rbf  0.930000

Hasil terbaik:
Split Ratio: 70:30
Kernel: linear
Akurasi: 0.9767


In [25]:
# Ekstrak fitur histogram RGB 8x8x8 â†’ 512 fitur
def extract_histogram(image):
    histogram = cv2.calcHist(
        [image],
        [0, 1, 2],         # RGB channels
        None,
        [8, 8, 8],         # bins
        [0, 256, 0, 256, 0, 256]
    )
    histogram = cv2.normalize(histogram, histogram).flatten()
    return histogram
# Membuat dataset fitur histogram
def build_feature_dataset(std_img_list):
    X = []
    y = []

    for item in std_img_list:
        image = item[0]
        label = item[1]

        hist = extract_histogram(image)
        X.append(hist)
        y.append(label)

    return np.array(X), np.array(y)

# Data training
X_train, y_train = build_feature_dataset(train_std_img_list)

# Data testing
X_test, y_test = build_feature_dataset(test_std_img_list)

# Gabungkan ulang train + test untuk split mandiri 80:20
full_X = np.concatenate((X_train, X_test), axis=0)
full_y = np.concatenate((y_train, y_test), axis=0)

from sklearn.model_selection import train_test_split

X_tr, X_te, y_tr, y_te = train_test_split(
    full_X, full_y, test_size=0.20, random_state=42, stratify=full_y
)

from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

svm_rbf = SVC(kernel='rbf')
svm_rbf.fit(X_tr, y_tr)

pred_rbf = svm_rbf.predict(X_te)
acc_rbf = accuracy_score(y_te, pred_rbf)

print("Akurasi SVM RBF (default):", acc_rbf)

best_acc = 0
best_params = {}

C_values = [1, 5, 10, 20, 50]
gamma_values = [0.001, 0.01, 0.1, 1]

for C in C_values:
    for gamma in gamma_values:
        model = SVC(kernel='rbf', C=C, gamma=gamma)
        model.fit(X_tr, y_tr)

        pred = model.predict(X_te)
        acc = accuracy_score(y_te, pred)

        print(f"C={C}, gamma={gamma}, Acc={acc:.4f}")

        if acc > best_acc:
            best_acc = acc
            best_params = {"C": C, "gamma": gamma}

print("\nAkurasi Terbaik:", best_acc)
print("Parameter Terbaik:", best_params)


Akurasi SVM RBF (default): 0.9875
C=1, gamma=0.001, Acc=0.8750
C=1, gamma=0.01, Acc=0.8625
C=1, gamma=0.1, Acc=0.9750
C=1, gamma=1, Acc=0.9875
C=5, gamma=0.001, Acc=0.8750
C=5, gamma=0.01, Acc=0.9500
C=5, gamma=0.1, Acc=0.9875
C=5, gamma=1, Acc=0.9875
C=10, gamma=0.001, Acc=0.8625
C=10, gamma=0.01, Acc=0.9750
C=10, gamma=0.1, Acc=0.9875
C=10, gamma=1, Acc=0.9875
C=20, gamma=0.001, Acc=0.8875
C=20, gamma=0.01, Acc=0.9875
C=20, gamma=0.1, Acc=0.9875
C=20, gamma=1, Acc=0.9875
C=50, gamma=0.001, Acc=0.9500
C=50, gamma=0.01, Acc=0.9875
C=50, gamma=0.1, Acc=0.9875
C=50, gamma=1, Acc=0.9875

Akurasi Terbaik: 0.9875
Parameter Terbaik: {'C': 1, 'gamma': 1}
