# NYCU Machine Learning 2024 : HW4 SVM

In [1]:
import numpy as np
import pandas as pd

from pathlib import Path
from rich import print
from dataclasses import dataclass
from typing import Callable

In [2]:
LABEL = ["Setosa" , "Versicolor" , "Virginica" ]

COLOR_1 = dict(zip(LABEL, ["red" , "green" , "blue"]))
COLOR_2 = dict(zip(LABEL, ["pink" , "yellow" , "orange"]))
COLOR_3 = dict(zip(LABEL, ["brown", "lightgreen", "navy", "magenta"]))
COLOR_4 = dict(zip(LABEL, ["teal", "gold", "violet", "coral"]))

COLOR_SELECT = {
    "before": {
        "train":COLOR_1,
        "test":COLOR_2,
    },
    "after": {
        "train":COLOR_3,
        "test":COLOR_4,
    }
}

COLUMN_NAME = ["Sepal length", "Sepal width" , "Petal length" , "Petal width" , "Label"]
TRAIN_DATA_SIZE = 25
ASSETS = "./assets"

In [3]:
assets_folder = Path(ASSETS)
assets_folder.mkdir(parents=True, exist_ok=True) 

In [4]:
def load_iris_file(with_name:bool=False)->pd.DataFrame:
    df = pd.read_fwf("./iris.txt")
    
    df_new = pd.DataFrame({k:[v] for k ,v in zip(COLUMN_NAME , df.columns)},dtype=float)
    df.columns = COLUMN_NAME
    df_new = pd.concat([df_new, df], axis=0).reset_index().drop(columns=["index"])
    
    if not with_name:
        return df_new
    
    df_with_name = df_new.copy()
    
    df_with_name["Label"] = df_with_name["Label"].apply(lambda x : LABEL[int(x)-1])
    
    return df_with_name

In [5]:
df = load_iris_file(with_name=True)
df

Unnamed: 0,Sepal length,Sepal width,Petal length,Petal width,Label
0,5.1,3.5,1.4,0.2,Setosa
1,4.9,3.0,1.4,0.2,Setosa
2,4.7,3.2,1.3,0.2,Setosa
3,4.6,3.1,1.5,0.2,Setosa
4,5.0,3.6,1.4,0.2,Setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,Virginica
146,6.3,2.5,5.0,1.9,Virginica
147,6.5,3.0,5.2,2.0,Virginica
148,6.2,3.4,5.4,2.3,Virginica


In [6]:
POSITIVE_CLASS ,NEGATIVE_CLASS= "Versicolor" , "Virginica"

In [7]:
df_need = df[["Petal length","Petal width", "Label"]]
df_need

Unnamed: 0,Petal length,Petal width,Label
0,1.4,0.2,Setosa
1,1.4,0.2,Setosa
2,1.3,0.2,Setosa
3,1.5,0.2,Setosa
4,1.4,0.2,Setosa
...,...,...,...
145,5.2,2.3,Virginica
146,5.0,1.9,Virginica
147,5.2,2.0,Virginica
148,5.4,2.3,Virginica


In [8]:
df_need_class = df_need[(df_need["Label"] == POSITIVE_CLASS) | (df_need["Label"] == NEGATIVE_CLASS)]
df_need_class = df_need_class.reset_index().drop(columns=["index"])
df_need_class

Unnamed: 0,Petal length,Petal width,Label
0,4.7,1.4,Versicolor
1,4.5,1.5,Versicolor
2,4.9,1.5,Versicolor
3,4.0,1.3,Versicolor
4,4.6,1.5,Versicolor
...,...,...,...
95,5.2,2.3,Virginica
96,5.0,1.9,Virginica
97,5.2,2.0,Virginica
98,5.4,2.3,Virginica


## Model

In [16]:
def rbf(sigma:float) -> Callable[[np.ndarray, np.ndarray], np.ndarray]:
    def run(x_1:np.ndarray, x_2:np.ndarray)->np.ndarray:
        over = 1/ (2* sigma **2)
        dis = -np.linalg.norm(x_1 - x_2)**2 
        
        return np.exp(dis* over)
    
    return run

def poly(p:int)-> Callable[[np.ndarray, np.ndarray], np.ndarray]:
    def run(x_1:np.ndarray, x_2:np.ndarray)->np.ndarray:
        return (1+x_1.T @ x_2.T)**p
    
    return run

def linear() -> Callable[[np.ndarray, np.ndarray], np.ndarray]:
    
    def run(x_1:np.ndarray, x_2:np.ndarray)->np.ndarray:
        return x_1.T @ x_2.T
    
    return run

class Kernel:
    kernel_dict = {
        "linear":linear,
        "rbf": rbf,
        "poly": poly,
    }
        
    @staticmethod
    def get_kernel(name:str, config:dict) -> Callable[[np.ndarray, np.ndarray], np.ndarray]:
        if name not in Kernel.kernel_dict:
            raise NotImplementedError("Not implemented")
        
        func = Kernel.kernel_dict[name]
        
        return func(**config) 

In [21]:
kernel = Kernel.get_kernel("rbf", {"sigma":1})

In [10]:
class SupportVectorMachine:
    def __init__(self, c1:int,c2:int, kernel_name:str="linear", kernel_arg:dict=dict()):
        self._c1, self._c2 = c1, c2
        
        # like ("ay": ... , "x": ...,)
        self.a_x_y = []
        self.b = None
        self._kernel = Kernel.get_kernel(kernel_name, kernel_arg)
        return
    
    def train(self):
        return 
    
    def __call__(self, x : np.ndarray):
        items = [item["ay"]*self._kernel(item["x"], x) for item in self.a_x_y]
        res = np.sum(items) + self.b
        return res
    