# TinyML - Support Vector Machine (Classifier)

In [1]:
#!pip install micromlgen

## 1. Importing libraries

In [1]:
from micromlgen import port
import pandas as pd
import plotly.graph_objects as go
import numpy as np
import plotly.express as px
import matplotlib.pyplot as plt
import os
import csv

from sklearn.svm import SVC
from sklearn.calibration import LabelEncoder
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import classification_report, accuracy_score
from sklearn.metrics import make_scorer, roc_auc_score
from mlxtend.plotting import plot_decision_regions
from sklearn import metrics

## 2. Load Dataset

In [2]:
#########################################
# Funciones para leer y procesar el CSV
#########################################
def leer_csv_a_arreglo(ruta_archivo):
    """
    Lee un archivo CSV en el que:
      - Las columnas están separadas por ';'
      - La primera columna es el target.
      - Las siguientes 5 columnas contienen 4 valores cada una, separados por ','.
    Cada fila resultante tendrá 21 elementos: 1 target y 20 features.
    """
    datos = []
    with open(ruta_archivo, newline='', encoding='utf-8') as archivo:
        lector = csv.reader(archivo, delimiter=';')
        for fila in lector:
            fila_completa = []
            for i, columna in enumerate(fila):
                if i == 0:
                    # Primer valor: target (se toma tal cual)
                    fila_completa.append(columna.strip())
                else:
                    # Las columnas restantes se dividen por coma
                    valores = [valor.strip() for valor in columna.split(',')]
                    fila_completa.extend(valores)
            if len(fila_completa) == 21:
                datos.append(fila_completa)
            else:
                continue
    return datos

def lista_a_diccionario(lista):
    """
    Transforma una lista de listas en un diccionario con dos llaves:
      - 'target': array con el primer elemento de cada fila.
      - 'data': array de arrays con el resto de los elementos de cada fila.
    """
    diccionario = {
        'target': [fila[0] for fila in lista if fila],
        'data': [fila[1:] for fila in lista if fila]
    }
    return diccionario

In [3]:
#########################################
# Cargar el CSV y preparar el DataFrame
#########################################
# Cambia 'tu_archivo.csv' por la ruta real de tu archivo CSV.
ruta_csv = r'C:\Users\quija\Desktop\SVM_1.csv'
lista_filas = leer_csv_a_arreglo(ruta_csv)
data_dict = lista_a_diccionario(lista_filas)

# Convertir los tipos:
# - target a entero
# - data a flotantes (se esperan 20 features por fila)
data_dict['target'] = [int(x) for x in data_dict['target']]
data_dict['data'] = [[float(val) for val in fila] for fila in data_dict['data']]

In [10]:
# Create a DataFrame
# Generar nombres para las 20 características: f1, f2, ..., f20.
feature_columns = [f"f{i+1}" for i in range(20)]
df = pd.DataFrame(data_dict['data'], columns=feature_columns)
df['target'] = data_dict['target']

df = df[df["target"] != 0].reset_index(drop=True)

print("DataFrame head:")
print(df.head())

DataFrame head:
             f1            f2         f3         f4            f5  \
0  4.103111e+06  1.645029e+08  28.679860  86.163669  2.059749e+07   
1  4.103113e+06  1.645029e+08  28.679864  86.163691  2.059750e+07   
2  4.103114e+06  1.645029e+08  28.679866  86.163703  2.059751e+07   
3  4.103114e+06  1.645030e+08  28.679869  86.163720  2.059751e+07   
4  4.103119e+06  1.645031e+08  28.679877  86.163764  2.059762e+07   

             f6         f7          f8            f9           f10  ...  \
0  4.440014e+08  28.527794  143.508645  6.528351e+06  2.661408e+08  ...   
1  4.440024e+08  28.527807  143.508835  6.528389e+06  2.661427e+08  ...   
2  4.440030e+08  28.527814  143.508962  6.528416e+06  2.661441e+08  ...   
3  4.440040e+08  28.527825  143.509123  6.528423e+06  2.661460e+08  ...   
4  4.440062e+08  28.527855  143.509558  6.528624e+06  2.661505e+08  ...   

          f12           f13           f14        f15         f16  \
0  106.764152  7.604239e+06  3.030725e+08  27.3864

In [11]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
df[feature_columns] = scaler.fit_transform(df[feature_columns])
print(df.head())

         f1        f2        f3        f4        f5        f6        f7  \
0 -1.811530 -1.823141 -2.339216 -1.916509 -2.192058 -1.886991 -1.805274   
1 -1.762924 -1.760810 -2.196159 -1.839296 -2.114946 -1.813888 -1.749523   
2 -1.717428 -1.729434 -2.124630 -1.797179 -2.053776 -1.767502 -1.719503   
3 -1.714845 -1.673482 -2.017337 -1.737515 -2.043339 -1.698398 -1.672329   
4 -1.517788 -1.547132 -1.731221 -1.583088 -1.425068 -1.534074 -1.543673   

         f8        f9       f10  ...       f12       f13       f14       f15  \
0 -1.874656 -1.808023 -1.771854  ... -1.778087 -2.119015 -2.170043 -2.133085   
1 -1.804224 -1.737606 -1.712409  ... -1.718609 -2.058322 -2.040577 -2.031834   
2 -1.757146 -1.686036 -1.672238  ... -1.676974 -2.015618 -1.977341 -1.977314   
3 -1.697464 -1.673987 -1.614121  ... -1.624930 -2.006581 -1.865671 -1.891640   
4 -1.536212 -1.294971 -1.478251  ... -1.488006 -1.577916 -1.637811 -1.681350   

        f16       f17       f18       f19       f20  target  
0 -2.1

In [12]:
#########################################
# Preparar datos para el modelo
#########################################
X = df[feature_columns].to_numpy()
y = df['target'].to_numpy()

# Si fuera necesario (por ejemplo, si los targets fueran cadenas), se puede usar LabelEncoder.
y = LabelEncoder().fit_transform(y)

print (y)

[0 1 2 3 2 1 2 3 2 2 3 3 2 1 2 3 1 2 3 0 2 1 2 1 0 0 3 2 2 3 0 3 2 1 3 0 1
 0 1 1 0 3 3 1 1 0 2 0 3 0 1 3 2 1 0 0 3 0 1 3 2 1 0 0]


In [17]:
print("Valores únicos en y antes de LabelEncoder:", np.unique(y))


Valores únicos en y antes de LabelEncoder: [0 1 2 3]


## 3. Dataset Visualization 

In [13]:
#########################################
# Visualización 3D del dataset (usando f1, f2 y f3)
#########################################
fig = go.Figure()
fig.add_trace(go.Scatter3d(
    x=df['f1'],
    y=df['f2'],
    z=df['f3'],
    mode='markers',
    marker=dict(color='blue')
))
fig.update_layout(
    scene=dict(
        xaxis_title='Feature 1',
        yaxis_title='Feature 2',
        zaxis_title='Feature 3'
    ),
    scene_camera=dict(eye=dict(x=1.87, y=0.88, z=-0.64)),
    width=1000,
    height=600
)
# Descomenta la siguiente línea si deseas visualizar la gráfica.
fig.show()

In [14]:
print('Input shape: ', X.shape)
print('Target variable shape: ', y.shape)

Input shape:  (64, 20)
Target variable shape:  (64,)


## 4. Split into training and test data

In [15]:
#########################################
# División en entrenamiento y prueba
#########################################
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=21)

In [16]:
y_train

array([2, 2, 0, 2, 3, 3, 0, 0, 0, 1, 3, 0, 1, 0, 0, 1, 1, 3, 1, 2, 1, 0,
       0, 2, 3, 0, 0, 1, 0, 2, 1, 2, 1, 1, 3, 1, 3, 1, 2, 2, 3, 0, 3, 0,
       1, 3, 3, 2, 3, 3, 2])

In [11]:
y_test

array([1, 0, 0, 3, 3, 1, 0, 0, 2, 0, 0, 0, 0, 0, 4, 0, 0, 4, 0, 3, 0, 0,
       0, 4, 3, 4, 1, 3, 0, 3, 0])

## 5. Create the classification model

In [79]:
#########################################
# Creación y entrenamiento del modelo SVM
#########################################
model = SVC(C = 241.0, gamma=41.0, kernel='rbf', decision_function_shape="ovr", probability = True)

## 6. Train the model

In [80]:
model.fit(X_train, y_train)

## 6. Evaluating the model with the training data

In [81]:
# Evaluación con datos de entrenamiento
training_predict = model.predict(X_train)

In [82]:
print("Reporte de clasificación (entrenamiento):")
print(metrics.classification_report(y_train, training_predict, digits = 3))

Reporte de clasificación (entrenamiento):
              precision    recall  f1-score   support

           0      1.000     1.000     1.000        14
           1      1.000     1.000     1.000        13
           2      1.000     1.000     1.000        11
           3      1.000     1.000     1.000        13

    accuracy                          1.000        51
   macro avg      1.000     1.000     1.000        51
weighted avg      1.000     1.000     1.000        51



In [83]:
print("Matriz de confusión (entrenamiento):")
print(metrics.confusion_matrix(y_train, training_predict))

Matriz de confusión (entrenamiento):
[[14  0  0  0]
 [ 0 13  0  0]
 [ 0  0 11  0]
 [ 0  0  0 13]]


In [84]:
print(f'Model accuracy: {round(metrics.accuracy_score(y_train, training_predict)*100,2)}%')

Model accuracy: 100.0%


## 8. Evaluating the model with test data

In [85]:
# Evaluación con datos de entrenamiento
test_predict = model.predict(X_test)

In [86]:
print("Reporte de clasificación de prueba:")
print(metrics.classification_report(y_test, test_predict, digits = 3))

Reporte de clasificación de prueba:
              precision    recall  f1-score   support

           0      0.111     0.500     0.182         2
           1      0.000     0.000     0.000         3
           2      0.500     0.200     0.286         5
           3      0.000     0.000     0.000         3

    accuracy                          0.154        13
   macro avg      0.153     0.175     0.117        13
weighted avg      0.209     0.154     0.138        13



In [87]:
print("Matriz de confusión (prueba):")
print(metrics.confusion_matrix(y_test, test_predict))

Matriz de confusión (prueba):
[[1 0 0 1]
 [3 0 0 0]
 [3 1 1 0]
 [2 0 1 0]]


In [88]:
print(f'Model accuracy: {round(metrics.accuracy_score(y_test, test_predict)*100,2)}%')

Model accuracy: 15.38%


## 10. Obtaining the model to be implemented in the microcontroller

In [89]:
print(port(model))

#pragma once
#include <cstdarg>
namespace Eloquent {
    namespace ML {
        namespace Port {
            class SVM {
                public:
                    /**
                    * Predict class for features vector
                    */
                    int predict(float *x) {
                        float kernels[51] = { 0 };
                        float decisions[6] = { 0 };
                        int votes[4] = { 0 };
                        kernels[0] = compute_kernel(x,   0.022185597254  , -0.029982186394  , 0.12852836959  , 0.003290332124  , 0.012263102866  , -0.045011168983  , -0.025530204804  , -0.037492306951  , -0.145909315972  , -0.075787871812  , -0.023022286756  , -0.066225804967  , 0.08680523742  , 0.105308564976  , 0.125590113648  , 0.121038775278  , 0.000590453458  , -0.025688590718  , 0.239773052867  , 0.007801903354 );
                        kernels[1] = compute_kernel(x,   -0.380762995027  , -0.262984984361  , -0.086058125724  , -0.235368426977  , -0

## 11. Saves the template in a .h file

In [29]:
with open('./SVMClassifier/SVMClassifier.h', 'w') as file:
    file.write(port(model))

## (BONUS) Hyperparameter tuning

RandomizedSearchCV is a function provided by the scikit-learn library in Python, commonly used for hyperparameter tuning in machine learning models through cross-validation. This technique proves beneficial when dealing with an extensive search space for hyperparameters and aims to identify the most effective combination of values.

Step-by-Step Explanation
1. Definition of Parameter Space:
Before utilizing RandomizedSearchCV, one needs to specify a search space for the model's hyperparameters. Rather than providing a specific grid of values, distributions are defined for each hyperparameter.

2. Random Sampling:
Instead of evaluating all conceivable combinations of hyperparameters (as in the case of GridSearchCV), RandomizedSearchCV randomly selects a fixed set of combinations for evaluation. This proves advantageous when dealing with a large search space.

3. Model Training:
For each randomly selected set of hyperparameters, RandomizedSearchCV trains the model using cross-validation. The data is divided into folds, with the model being trained on some folds and evaluated on the remaining folds.

4. Performance Evaluation:
Performance is measured using a specified metric (e.g., accuracy, F1-score). The objective is to find hyperparameters that maximize or minimize this metric, depending on the problem at hand (e.g., maximizing accuracy in a classification problem).

5. Selection of the Best Model:
Upon completion of the random search, RandomizedSearchCV returns the set of hyperparameters that led to the best average performance during cross-validation.

By employing RandomizedSearchCV, computational time can be saved compared to an exhaustive grid search (GridSearchCV), especially when dealing with a large search space. This efficiency stems from exploring a random sample of the hyperparameter space rather than evaluating all possible combinations.

### 2. Set Grid search for Combinations of Parameters

In [59]:
params = {
    "C": np.arange(1, 500, 10), #0.01 A 1000 TIPICAMENTE VALORES LOGARITMICOS
    "gamma": np.arange(1, 100, 10),
    "kernel": ['rbf', 'linear', 'poly'],
    "decision_function_shape": ['ovo','ovr']
}

### 3. Define Performance Measure

In [60]:
auc = make_scorer(roc_auc_score, multi_class="ovo")

In [35]:
from sklearn.metrics import make_scorer, f1_score

f1 = make_scorer(f1_score, average="weighted")
best_model = RandomizedSearchCV(
    model, 
    param_distributions=params, 
    random_state=42,
    n_iter=2, 
    cv=5, 
    verbose=2, 
    n_jobs=-1,
    return_train_score=True, 
    scoring=f1
)


In [34]:
from sklearn.metrics import make_scorer, average_precision_score

pr_auc = make_scorer(average_precision_score, needs_proba=True)
best_model = RandomizedSearchCV(
    model, 
    param_distributions=params, 
    random_state=42,
    n_iter=2, 
    cv=5, 
    verbose=2, 
    n_jobs=-1,
    return_train_score=True, 
    scoring=pr_auc
)


### 4. Runs the search for the best model

STANDARD SCALER

In [62]:
best_model = RandomizedSearchCV(model, param_distributions=params, random_state=42, 
                                n_iter=10, cv=10, verbose=2, n_jobs=-1, 
                                return_train_score=True, scoring = auc)

In [63]:
best_model.fit(X_train, y_train)

Fitting 10 folds for each of 10 candidates, totalling 100 fits



One or more of the test scores are non-finite: [nan nan nan nan nan nan nan nan nan nan]


One or more of the train scores are non-finite: [nan nan nan nan nan nan nan nan nan nan]



### 5. Report the best model

In [64]:
def report_best_scores(results, n_top=3):
    for i in range(1, n_top + 1):
        candidates = np.flatnonzero(results['rank_test_score'] == i)
        for candidate in candidates:
            print("Model with rank: {0}".format(i))
            print("Mean validation score: {0:.3f} (std: {1:.3f})".format(
                results['mean_test_score'][candidate],
                results['std_test_score'][candidate]))
            best_params = results['params'][candidate]
            print("Best parameters found:")
            for param, value in best_params.items():
                print("  {0}: {1}".format(param, value))
            print("")

In [65]:
report_best_scores(best_model.cv_results_, 1)

Model with rank: 1
Mean validation score: nan (std: nan)
Best parameters found:
  kernel: poly
  gamma: 61
  decision_function_shape: ovo
  C: 141

Model with rank: 1
Mean validation score: nan (std: nan)
Best parameters found:
  kernel: linear
  gamma: 11
  decision_function_shape: ovr
  C: 211

Model with rank: 1
Mean validation score: nan (std: nan)
Best parameters found:
  kernel: poly
  gamma: 61
  decision_function_shape: ovr
  C: 181

Model with rank: 1
Mean validation score: nan (std: nan)
Best parameters found:
  kernel: rbf
  gamma: 51
  decision_function_shape: ovo
  C: 181

Model with rank: 1
Mean validation score: nan (std: nan)
Best parameters found:
  kernel: rbf
  gamma: 61
  decision_function_shape: ovo
  C: 271

Model with rank: 1
Mean validation score: nan (std: nan)
Best parameters found:
  kernel: rbf
  gamma: 31
  decision_function_shape: ovo
  C: 361

Model with rank: 1
Mean validation score: nan (std: nan)
Best parameters found:
  kernel: linear
  gamma: 51
  de