<a href="https://colab.research.google.com/github/Hamdankim/2341720251_ML_2025/blob/main/JS11/TP_JS11.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# SVM Classification Tasks

## Part 1 — SVM on voice.csv

In [1]:

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

df = pd.read_csv('/mnt/data/voice.csv')
df.head()


Unnamed: 0,meanfreq,sd,median,Q25,Q75,IQR,skew,kurt,sp.ent,sfm,...,centroid,meanfun,minfun,maxfun,meandom,mindom,maxdom,dfrange,modindx,label
0,0.059781,0.064241,0.032027,0.015071,0.090193,0.075122,12.863462,274.402906,0.893369,0.491918,...,0.059781,0.084279,0.015702,0.275862,0.007812,0.007812,0.007812,0.0,0.0,male
1,0.066009,0.06731,0.040229,0.019414,0.092666,0.073252,22.423285,634.613855,0.892193,0.513724,...,0.066009,0.107937,0.015826,0.25,0.009014,0.007812,0.054688,0.046875,0.052632,male
2,0.077316,0.083829,0.036718,0.008701,0.131908,0.123207,30.757155,1024.927705,0.846389,0.478905,...,0.077316,0.098706,0.015656,0.271186,0.00799,0.007812,0.015625,0.007812,0.046512,male
3,0.151228,0.072111,0.158011,0.096582,0.207955,0.111374,1.232831,4.177296,0.963322,0.727232,...,0.151228,0.088965,0.017798,0.25,0.201497,0.007812,0.5625,0.554688,0.247119,male
4,0.13512,0.079146,0.124656,0.07872,0.206045,0.127325,1.101174,4.333713,0.971955,0.783568,...,0.13512,0.106398,0.016931,0.266667,0.712812,0.007812,5.484375,5.476562,0.208274,male


In [2]:

# Encode label
le = LabelEncoder()
df['label'] = le.fit_transform(df['label'])

X = df.drop('label', axis=1)
y = df['label']

sc = StandardScaler()
X_scaled = sc.fit_transform(X)


In [3]:

splits = [(0.7,0.3),(0.8,0.2)]
kernels = ['linear','poly','rbf']

results = []

for train_size,test_size in splits:
    X_train, X_test, y_train, y_test = train_test_split(
        X_scaled, y, test_size=test_size, random_state=42, stratify=y)

    for k in kernels:
        model = SVC(kernel=k)
        model.fit(X_train, y_train)
        pred = model.predict(X_test)
        acc = accuracy_score(y_test, pred)
        results.append([train_size, k, acc])

results_df = pd.DataFrame(results, columns=['Train Ratio','Kernel','Accuracy'])
results_df


Unnamed: 0,Train Ratio,Kernel,Accuracy
0,0.7,linear,0.97897
1,0.7,poly,0.958991
2,0.7,rbf,0.983176
3,0.8,linear,0.974763
4,0.8,poly,0.957413
5,0.8,rbf,0.98265


## Part 2 — SVM RBF for Day/Night Classification

In [5]:
import os
from zipfile import ZipFile

zip_path = '/mnt/data/images.zip'  # Ubah sesuai lokasi file zip jika diperlukan
extract_to = '/mnt/data/images'    # Default ekstraksi

if os.path.exists(zip_path):
    print(f'Found {zip_path} — extracting to {extract_to} ...')
    os.makedirs(extract_to, exist_ok=True)
    with ZipFile(zip_path, 'r') as zf:
        zf.extractall(extract_to)
    print('Extraction completed.')
else:
    print(f'No archive found at {zip_path}.')
    print('Make sure your images are placed in a folder with structure:')
    print('  images/training/<day|night>/*.jpg')
    print('  images/test/<day|night>/*.jpg')
    print('or extract your zip to /mnt/data/images and update the paths below if needed.')

Found /mnt/data/images.zip — extracting to /mnt/data/images ...
Extraction completed.


In [7]:

import cv2, numpy as np
from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

def extract_histogram(img_path):
    img = cv2.imread(str(img_path), cv2.IMREAD_GRAYSCALE)
    hist = cv2.calcHist([img],[0],None,[256],[0,256]).flatten()
    return hist

base = Path('/mnt/data/images/images/')
train_dir = base/'training'
test_dir  = base/'test'

X = []
y = []

for folder,label in [('day',0),('night',1)]:
    for img_path in (train_dir/folder).glob('*.jpg'):
        X.append(extract_histogram(img_path))
        y.append(label)

X = np.array(X)
y = np.array(y)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y)

model = SVC(kernel='rbf')
model.fit(X_train, y_train)
pred = model.predict(X_test)

baseline_acc = accuracy_score(y_test, pred)
baseline_acc


0.9166666666666666

In [8]:

# Hyperparameter tuning manual
C_vals = [0.1,1,10,100]
gamma_vals = ['scale',0.01,0.001]

tuning_results = []

for C in C_vals:
    for gamma in gamma_vals:
        m = SVC(kernel='rbf', C=C, gamma=gamma)
        m.fit(X_train, y_train)
        pred = m.predict(X_test)
        acc = accuracy_score(y_test, pred)
        tuning_results.append([C, gamma, acc])

tuning_df = pd.DataFrame(tuning_results, columns=['C','Gamma','Accuracy'])
tuning_df


Unnamed: 0,C,Gamma,Accuracy
0,0.1,scale,0.770833
1,0.1,0.01,0.5
2,0.1,0.001,0.5
3,1.0,scale,0.916667
4,1.0,0.01,0.5
5,1.0,0.001,0.5
6,10.0,scale,0.979167
7,10.0,0.01,0.5
8,10.0,0.001,0.5
9,100.0,scale,0.979167
