In [1]:
from scipy.io import loadmat
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

scaler=StandardScaler()

In [3]:
##---------------DATOS DE ENTRENAMIENTO -------------------------##
##Cargar los datos con extención .mat
x = loadmat('/DatosPrueba4.mat')

##Traernos los datos del archivo .mat que nos interesan
y = x['data_tr']

##Convertir los datos en un array de Numpy
z = np.array(y)

##Convertir el Array en un DataFrame de Pandas
df = pd.DataFrame(z)

##Contar el numero de datos nulos del dataFrame
np.count_nonzero(pd.isnull(df) == True)

df.columns = ['1','2','3','4','5','6','7','8','9','10','11','12','13','14','15','16','17','18','Label']
df.sample(8)

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,Label
911,0.000409,0.000826,0.000113,0.000152,2.214416,3.091875,0.002702,4e-06,0.000918,101.52,38.160872,138.0,0.0,-0.078866,-2.013617,138.0,62.0,108.388191,1.0
849,0.060528,0.26594,0.000402,0.000351,6.185386,40.481851,2.1107,2.2e-05,0.271441,597.5,584.209014,231.0,13.3434,1.23693,-0.353301,1676.0,222.0,833.602675,0.0
872,0.000158,0.000221,7.2e-05,0.000101,1.433364,0.620202,0.000707,3e-06,0.000271,110.64,36.66378,138.0,0.0,-0.574605,-1.686394,138.0,62.0,116.498927,1.0
1007,0.000203,0.000382,7e-06,5e-06,1.730696,1.152273,0.001114,4e-06,0.000431,125.52,32.500033,138.0,0.0,-1.337521,0.040578,155.0,62.0,129.618517,1.0
615,0.011189,0.022381,0.002011,0.002377,3.062,9.917885,0.121186,0.00012,0.024922,1043.06,568.235117,1476.0,0.0,-0.61741,-1.540981,1476.0,222.0,1186.438536,0.0
63,0.004746,0.01122,0.000564,0.000342,4.095941,17.863699,0.068212,0.000239,0.012131,456.23,64.334645,474.0,0.0,0.282448,3.46391,681.0,318.0,460.698784,0.0
658,0.015497,0.030483,0.005308,0.00724,3.938421,18.446931,0.206535,0.000115,0.034061,750.41,575.279942,477.5,369.9087,0.740268,-1.198228,1676.0,228.0,943.796964,0.0
333,0.546203,1.808611,0.013419,0.01974,5.119974,29.011126,13.207688,9.6e-05,1.880612,816.23,672.594732,266.0,65.2344,0.381595,-1.795197,1676.0,222.0,1055.505211,0.0


In [4]:
YEntrenamiento = df['Label']
XEntrenamiento = scaler.fit_transform(df.drop(['Label'],axis=1))

In [5]:
from sklearn.model_selection import StratifiedKFold
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
import itertools as itertools

In [6]:
def experiementarSVC(x, y, kernels, gammas,params_reg):
    """función que realizar experimentos sobre un SVM para clasificación
    
    x: numpy.Array, con las caracteristicas del problema
    y: numpy.Array, con la variable objetivo
    kernels: List[str], lista con valores a pasar 
        a sklearn correspondiente al kernel de la SVM
    gammas: List[float], lista con los valores a pasar a
        sklean correspondiente el valor de los coeficientes para usar en el
        kernel
    params_reg: List[float], lista con los valores a a pasar a 
        sklearn para ser usados como parametro de regularización
    
    retorna: pd.Dataframe con las siguientes columnas:
        - 3 columnas con los tres parametros: kernel, gamma, param de regularizacion
        - error cuadratico medio en el cojunto entrenamiento (promedio de los 4 folds)
        - error cuadratico medio en el cojunto test (promedio de los 4 folds)
        - % de Vectores de Soporte promedio para los 4 folds (0 a 100)
    """
    idx = 0
    kf = StratifiedKFold(n_splits=4)
    # crear una lista con la combinaciones de los elementos de cada list
    kernels_gammas_regs = list(itertools.product(kernels, gammas, params_reg))
    resultados = pd.DataFrame()
    
    for params in kernels_gammas_regs:
        kernel, gamma, param_reg = params
        print("parametros usados", params) # puede usar para ver los params
        errores_train = []
        errores_test = []
        pct_support_vectors = []        
        for train_index, test_index in kf.split(x, y):
            X_train, X_test = x[train_index], x[test_index]
            y_train, y_test = y[train_index], y[test_index]  
            # normalizar los datos
            scaler = StandardScaler()
            X_train = scaler.fit_transform(X_train)
            X_test = scaler.transform(X_test)
            svm = SVC(kernel=kernel, gamma=gamma, C=param_reg, max_iter = 100)
            # Entrenar el modelo
            svm.fit(X=X_train, y=y_train)
            # calculo de errores
            y_train_pred = svm.predict(X=X_train)
            y_test_pred = svm.predict(X=X_test)
            # error y pct de vectores de soporte
            errores_train.append(accuracy_score(y_true = y_train, y_pred =y_train_pred))
            errores_test.append(accuracy_score(y_true = y_test, y_pred = y_test_pred))
            # contar muestras de entrenamiento
            n_train = X_train.shape[0]
            pct_vs = (len(svm.support_vectors_)/n_train)*100
            pct_support_vectors.append(pct_vs)
        
        resultados.loc[idx,'kernel'] = kernel
        resultados.loc[idx,'gamma'] = gamma
        resultados.loc[idx,'param_reg'] = param_reg
        resultados.loc[idx,'error de entrenamiento'] = np.mean(errores_train)
        resultados.loc[idx,'error de prueba'] = np.mean(errores_test)
        resultados.loc[idx,'% de vectores de soporte'] = np.mean(pct_support_vectors)
        idx+=1
    return (resultados)

In [7]:
resultadosSVC = experiementarSVC(x = XEntrenamiento,y=YEntrenamiento,
                                 kernels=['linear', 'rbf'],
                                 gammas = [0.01,0.1],
                                 params_reg = [0.001, 0.01,0.1, 1.0,10]
                                )

resultadosSVC

parametros usados ('linear', 0.01, 0.001)
parametros usados ('linear', 0.01, 0.01)
parametros usados ('linear', 0.01, 0.1)
parametros usados ('linear', 0.01, 1.0)
parametros usados ('linear', 0.01, 10)
parametros usados ('linear', 0.1, 0.001)




parametros usados ('linear', 0.1, 0.01)
parametros usados ('linear', 0.1, 0.1)
parametros usados ('linear', 0.1, 1.0)
parametros usados ('linear', 0.1, 10)
parametros usados ('rbf', 0.01, 0.001)
parametros usados ('rbf', 0.01, 0.01)




parametros usados ('rbf', 0.01, 0.1)
parametros usados ('rbf', 0.01, 1.0)
parametros usados ('rbf', 0.01, 10)
parametros usados ('rbf', 0.1, 0.001)




parametros usados ('rbf', 0.1, 0.01)
parametros usados ('rbf', 0.1, 0.1)
parametros usados ('rbf', 0.1, 1.0)
parametros usados ('rbf', 0.1, 10)




Unnamed: 0,kernel,gamma,param_reg,error de entrenamiento,error de prueba,% de vectores de soporte
0,linear,0.01,0.001,0.969584,0.967091,25.086241
1,linear,0.01,0.01,0.989026,0.991541,11.508068
2,linear,0.01,0.1,1.0,0.99906,3.637348
3,linear,0.01,1.0,1.0,1.0,1.28564
4,linear,0.01,10.0,1.0,1.0,0.815283
5,linear,0.1,0.001,0.969584,0.967091,25.086241
6,linear,0.1,0.01,0.989026,0.991541,11.508068
7,linear,0.1,0.1,1.0,0.99906,3.637348
8,linear,0.1,1.0,1.0,1.0,1.28564
9,linear,0.1,10.0,1.0,1.0,0.815283
