In [1]:
import warnings
warnings.filterwarnings('ignore') 

In [2]:
import pandas as pd
import numpy as np
import qgrid
from numpy import random
import scipy as sc
import numpy.matlib as matlib
import time
import matplotlib.pyplot as plt

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from mlxtend.preprocessing import standardize
from mlxtend.feature_extraction import PrincipalComponentAnalysis as PCA

db = np.loadtxt('segmentationT.data', delimiter = ',')

Y = db[:, 0]
Y = Y.astype(int)
X = db[:, 1:]



In [3]:
def classification_error(y_est, y_real):
    err = 0
    for y_e, y_r in zip(y_est, y_real):

        if y_e != y_r:
            err += 1

    return err/np.size(y_est)

In [4]:
def extract_features(tipo, n):
    
    ext = PCA(n_components=n)
    
    return ext

# Naive bayes

In [5]:
from sklearn.naive_bayes import GaussianNB

In [6]:
def model_NB(nC, impresion = False):

    tiempo_i = time.time()

    accuracy_list = np.zeros([4])
    precision_list = np.zeros([4,7])
    recall_list = np.zeros([4,7])
    f_list = np.zeros([4,7])
    TN_list = np.zeros([4])
    FP_list = np.zeros([4])
    FN_list = np.zeros([4])
    TP_list = np.zeros([4])   
    errores = np.zeros(4)
    nb = GaussianNB()
    for j in range(4):
               
        Xest = standardize(X)

        pca = PCA(n_components=nC)
        pca.fit(Xest)
        X_pca = pca.transform(Xest)
        
        Xtrain, Xtest, Ytrain, Ytest = train_test_split(X_pca, Y, test_size=0.25) # Modificar metodología de validación
#         scaler = MinMaxScaler()#Escala entre 0 y 1
#         Xtrain = scaler.fit_transform(Xtrain)
#         Xtest = scaler.transform(Xtest)
        
        nb.fit(Xtrain, Ytrain)
        pred = nb.predict(Xtest)
        
        #code for calculating accuracy 
        _accuracy_ = accuracy_score(Ytest, pred, normalize=True)
        accuracy_list[j] = _accuracy_

        #code for calculating recall 
        _recalls_ = recall_score(Ytest, pred, average=None)
        recall_list[j] = _recalls_

        #code for calculating precision 
        _precisions_ = precision_score(Ytest, pred, average=None)
        precision_list[j] = _precisions_
        
        _f_score_ = f1_score(Ytest, pred, average=None)
        f_list[j] = _f_score_


        #code for calculating confusion matrix 
        _confusion_matrix_ = confusion_matrix(Ytest, pred)
        TN_list[j] = _confusion_matrix_[0][0]
        FP_list[j] = _confusion_matrix_[0][1]
        FN_list[j] = _confusion_matrix_[1][0]
        TP_list[j] = _confusion_matrix_[1][1]
                
        errores[j] = classification_error(pred, Ytest)
          
    if impresion == True:
        cm = confusion_matrix(Ytest, pred)
        columnas = ['Clase %s'%(i) for i in list(ascii_uppercase)[0:len(np.unique(pred))]]
        df_cm = pd.DataFrame(cm,index = columnas, columns = columnas)
        
        grafica = sns.heatmap(df_cm, cmap = 'Blues', annot = True)
        
        grafica.set(xlabel = 'Verdaderos', ylabel = 'Predicciones')
#         print(classification_report(Ytest, pred))

    return str(np.mean(accuracy_list)), str(np.std(accuracy_list)), str(np.mean(recall_list)), str(np.std(recall_list)), str(np.mean(precision_list)), str(np.std(precision_list)),  str(np.mean(f_list)), str(np.std(f_list)), str(np.mean(errores)), str(np.std(errores)), str(time.time()-tiempo_i)

In [7]:
pd.options.mode.chained_assignment = None

randn = np.random.randn
df_types = pd.DataFrame({
    '#Componentes' : pd.Series(['1','2','3','4','5','6','7','8','9','10','11','12','13','14','15','16','17','18','19'])})
df_types["Eficiencia"] = ""
df_types["Int_Eficiencia"] = ""
df_types["Sensibilidad"] = ""
df_types["Int_Sensibilidad"] = ""
df_types["Precision"] = ""
df_types["Int_Precision"] = ""
df_types["F-Score"] = ""
df_types["Int_F-Score"] = ""
df_types["Error_Prueba"] = ""
df_types["Int_error"] = ""
df_types["Tiempo de ejecución"] = ""
df_types.set_index(['#Componentes'], inplace=True)

i = 0
for k in df_types.index:
    Acc, IntAcc, Sen, IntSen, Pre, IntPre, f, IntF, error, stdError, tiempo = model_NB(int(k), impresion = False)
    df_types["Eficiencia"][i] = Acc
    df_types["Int_Eficiencia"][i] = IntAcc
    df_types["Sensibilidad"][i] = Sen
    df_types["Int_Sensibilidad"][i] = IntSen
    df_types["Precision"][i] = Pre
    df_types["Int_Precision"][i] = IntPre
    df_types["F-Score"][i] = f
    df_types["Int_F-Score"][i] = IntF
    df_types["Error_Prueba"][i] = error
    df_types["Int_error"][i] = stdError
    df_types["Tiempo de ejecución"][i] = tiempo
    
    i += 1
    
qgrid_widget = qgrid.show_grid(df_types, show_toolbar=False)

In [8]:
qgrid_widget.get_changed_df()

Unnamed: 0_level_0,Eficiencia,Int_Eficiencia,Sensibilidad,Int_Sensibilidad,Precision,Int_Precision,F-Score,Int_F-Score,Error_Prueba,Int_error,Tiempo de ejecución
#Componentes,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,0.5216262975778547,0.0130906106837556,0.5270020004193678,0.3790790119426084,0.4440538286647698,0.2967932355556169,0.4568240851615006,0.3195174604194472,0.4783737024221453,0.0130906106837556,0.740673303604126
2,0.5302768166089965,0.0137049995828329,0.5294797500295033,0.3508014379675505,0.5410732001652568,0.2331723462599369,0.4857373042220541,0.2717792766197981,0.4697231833910035,0.0137049995828329,0.7066423892974854
3,0.717560553633218,0.0203746811649028,0.719850020254613,0.3153595313955398,0.7383079411244301,0.2368734888318865,0.6992335783724392,0.2750563677594042,0.282439446366782,0.0203746811649029,0.6986362934112549
4,0.7218858131487889,0.0224372185974791,0.7203184386054661,0.2834229439504573,0.7448403791812906,0.2216885511055498,0.7064420275670057,0.2592579104146332,0.278114186851211,0.0224372185974791,0.6896276473999023
5,0.75,0.0188136359610134,0.7488826531503173,0.2422847032853754,0.7690371455284595,0.1950302545064255,0.7427068606875318,0.2163679648223909,0.25,0.0188136359610134,0.7596917152404785
6,0.7439446366782007,0.0131191616679092,0.7449542364588144,0.254992604758998,0.7714813373202178,0.2054290760975014,0.7346522932237235,0.2266813399476002,0.2560553633217993,0.0131191616679092,0.6926307678222656
7,0.7802768166089966,0.0165494173613399,0.7727450031135763,0.2444951541286867,0.7884027349373132,0.1988292644530323,0.7645771563445882,0.2162314184477604,0.2197231833910034,0.0165494173613399,0.7096457481384277
8,0.8066608996539792,0.0159214206914411,0.8071447774192055,0.2131867668962445,0.8146221033728455,0.1588185880217643,0.8002564089996614,0.1821154183493625,0.1933391003460207,0.0159214206914411,0.7086446285247803
9,0.8205017301038062,0.0254970288117465,0.8183394186278223,0.1947889339588042,0.8245673798940815,0.162123069255604,0.8146410456836513,0.1734041781180375,0.1794982698961937,0.0254970288117465,0.6876258850097656
10,0.7988754325259515,0.0052440984658227,0.7968696898482255,0.2419439661031019,0.8126410564508115,0.1863906773040281,0.7892772329757693,0.2181298108399959,0.2011245674740484,0.0052440984658227,0.6856238842010498


# KNN

In [83]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix


In [84]:
def model_knn(nC, impresion = False):

    tiempo_i = time.time()
    
    accuracy_list = np.zeros([4])
    precision_list = np.zeros([4,7])
    recall_list = np.zeros([4,7])
    f_list = np.zeros([4,7])
    TN_list = np.zeros([4])
    FP_list = np.zeros([4])
    FN_list = np.zeros([4])
    TP_list = np.zeros([4])   
    errores = np.zeros(4)
    knn = KNeighborsClassifier(n_neighbors = 1)
    for j in range(4):
        
        Xest = standardize(X)

        pca = PCA(n_components=nC)
        pca.fit(Xest)
        X_pca = pca.transform(Xest)

        Xtrain, Xtest, Ytrain, Ytest = train_test_split(X_pca, Y, test_size=0.25) # Modificar metodología de validación
        
        knn.fit(Xtrain, Ytrain)
        pred = knn.predict(Xtest)
        
        #code for calculating accuracy 
        _accuracy_ = accuracy_score(Ytest, pred, normalize=True)
        accuracy_list[j] = _accuracy_

        #code for calculating recall 
        _recalls_ = recall_score(Ytest, pred, average=None)
        recall_list[j] = _recalls_

        #code for calculating precision 
        _precisions_ = precision_score(Ytest, pred, average=None)
        precision_list[j] = _precisions_
        
        _f_score_ = f1_score(Ytest, pred, average=None)
        f_list[j] = _f_score_


        #code for calculating confusion matrix 
        _confusion_matrix_ = confusion_matrix(Ytest, pred)
        TN_list[j] = _confusion_matrix_[0][0]
        FP_list[j] = _confusion_matrix_[0][1]
        FN_list[j] = _confusion_matrix_[1][0]
        TP_list[j] = _confusion_matrix_[1][1]
                
        errores[j] = classification_error(pred, Ytest)
           
    if impresion == True:
        print(confusion_matrix(Ytest, pred))
        print(classification_report(Ytest, pred))

    return str(np.mean(accuracy_list)), str(np.std(accuracy_list)), str(np.mean(recall_list)), str(np.std(recall_list)), str(np.mean(precision_list)), str(np.std(precision_list)),  str(np.mean(f_list)), str(np.std(f_list)), str(np.mean(errores)), str(np.std(errores)), str(time.time()-tiempo_i)


In [87]:
pd.options.mode.chained_assignment = None

randn = np.random.randn
df_types = pd.DataFrame({
    '#Componentes' : pd.Series(['1','2','3','4','5','6','7','8','9','10','11','12','13','14','15','16','17','18','19'])})
df_types["Eficiencia"] = ""
df_types["Int_Eficiencia"] = ""
df_types["Sensibilidad"] = ""
df_types["Int_Sensibilidad"] = ""
df_types["Precision"] = ""
df_types["Int_Precision"] = ""
df_types["F-Score"] = ""
df_types["Int_F-Score"] = ""
df_types["Error_Prueba"] = ""
df_types["Int_error"] = ""
df_types["Tiempo de ejecución"] = ""
df_types.set_index(['#Componentes'], inplace=True)

i = 0
for k in df_types.index:
    Acc, IntAcc, Sen, IntSen, Pre, IntPre, f, IntF, error, stdError, tiempo = model_knn(int(k), impresion = False)
    df_types["Eficiencia"][i] = Acc
    df_types["Int_Eficiencia"][i] = IntAcc
    df_types["Sensibilidad"][i] = Sen
    df_types["Int_Sensibilidad"][i] = IntSen
    df_types["Precision"][i] = Pre
    df_types["Int_Precision"][i] = IntPre
    df_types["F-Score"][i] = f
    df_types["Int_F-Score"][i] = IntF
    df_types["Error_Prueba"][i] = error
    df_types["Int_error"][i] = stdError
    df_types["Tiempo de ejecución"][i] = tiempo
    
    i += 1
    
qgrid_widget = qgrid.show_grid(df_types, show_toolbar=False)

In [88]:
qgrid_widget.get_changed_df()

Unnamed: 0_level_0,Eficiencia,Int_Eficiencia,Sensibilidad,Int_Sensibilidad,Precision,Int_Precision,F-Score,Int_F-Score,Error_Prueba,Int_error,Tiempo de ejecución
#Componentes,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,0.5583910034602076,0.0095841348642562,0.5635621715728067,0.2086429444252643,0.559417470921723,0.203731553547144,0.5595370695576732,0.2047520035042812,0.4416089965397924,0.0095841348642561,0.6906278133392334
2,0.6466262975778546,0.0188880670942024,0.6492873722208252,0.170607388092342,0.6493436920109109,0.1691664334554357,0.6478439798890425,0.1677561542369132,0.3533737024221453,0.0188880670942024,0.7386729717254639
3,0.8421280276816608,0.0028362623375008,0.8429186998186938,0.1243771459588856,0.8418825507007289,0.1215276783570424,0.8417453428190367,0.1213788700262632,0.1578719723183391,0.0028362623375008,0.7146492004394531
4,0.8931660899653979,0.0060398962127028,0.8913956106528736,0.093457280568559,0.8919808303943844,0.0787199926176714,0.8907398419017628,0.0821133060017799,0.106833910034602,0.0060398962127028,0.7616932392120361
5,0.916089965397924,0.0157857159090784,0.9171199218702444,0.0737603754739281,0.9169106678752978,0.0689349817385767,0.9159266826128656,0.0648128632890644,0.0839100346020761,0.0157857159090784,0.7136499881744385
6,0.9251730103806228,0.009505735716192,0.9233270060348708,0.0730647108912542,0.9239362033826188,0.0602777385062117,0.9229230516047064,0.0624186971709592,0.0748269896193771,0.009505735716192,0.7106475830078125
7,0.9385813148788928,0.0086936640321114,0.9387441943128486,0.0689027452555464,0.9388122057503132,0.0515577396851551,0.938009278793312,0.0552448941666939,0.0614186851211072,0.0086936640321114,0.7176527976989746
8,0.9385813148788926,0.0043252595155709,0.9387294237154088,0.0645701751173085,0.9389807186211926,0.0573930014949995,0.938421835084269,0.057977141747615,0.0614186851211072,0.0043252595155709,0.7396728992462158
9,0.9515570934256056,0.0102354321506914,0.95307116429476,0.0560830822355065,0.9515516776122448,0.0439223902278307,0.951834287575091,0.0462393262099138,0.0484429065743944,0.0102354321506913,0.7066428661346436
10,0.9476643598615916,0.0083310381852327,0.9492599613693582,0.0527042546432428,0.9497387626090864,0.0414501426564253,0.9489644946771576,0.0420766242236862,0.0523356401384083,0.0083310381852327,0.7136504650115967


# Redes Neuronales Artificiales

In [42]:
from sklearn.neural_network import MLPClassifier

In [43]:
def model_MLP(nC, impresion = False):

    mlp=MLPClassifier(activation='tanh',max_iter = 1000,hidden_layer_sizes=(15,15))
        
    tiempo_i = time.time()

    accuracy_list = np.zeros([4])
    precision_list = np.zeros([4,7])
    recall_list = np.zeros([4,7])
    f_list = np.zeros([4,7])
    TN_list = np.zeros([4])
    FP_list = np.zeros([4])
    FN_list = np.zeros([4])
    TP_list = np.zeros([4])   
    errores = np.zeros(4)
    for j in range(4):
        
        Xest = standardize(X)

        pca = PCA(n_components=nC)
        pca.fit(Xest)
        X_pca = pca.transform(Xest)
        
        Xtrain, Xtest, Ytrain, Ytest = train_test_split(X_pca, Y, test_size=0.25) # Modificar metodología de validación
#         scaler = MinMaxScaler()#Escala entre 0 y 1
#         Xtrain = scaler.fit_transform(Xtrain)
#         Xtest = scaler.transform(Xtest)
        
        mlp.fit(Xtrain, Ytrain)
        pred = mlp.predict(Xtest)
        
        #code for calculating accuracy 
        _accuracy_ = accuracy_score(Ytest, pred, normalize=True)
        accuracy_list[j] = _accuracy_

        #code for calculating recall 
        _recalls_ = recall_score(Ytest, pred, average=None)
        recall_list[j] = _recalls_

        #code for calculating precision 
        _precisions_ = precision_score(Ytest, pred, average=None)
        precision_list[j] = _precisions_
        
        _f_score_ = f1_score(Ytest, pred, average=None)
        f_list[j] = _f_score_


        #code for calculating confusion matrix 
        _confusion_matrix_ = confusion_matrix(Ytest, pred)
        TN_list[j] = _confusion_matrix_[0][0]
        FP_list[j] = _confusion_matrix_[0][1]
        FN_list[j] = _confusion_matrix_[1][0]
        TP_list[j] = _confusion_matrix_[1][1]
                
        
        errores[j] = classification_error(pred, Ytest)
       
    
    if impresion == True:
        print(confusion_matrix(Ytest, pred))
        print(classification_report(Ytest, pred))

    return str(np.mean(accuracy_list)), str(np.std(accuracy_list)), str(np.mean(recall_list)), str(np.std(recall_list)), str(np.mean(precision_list)), str(np.std(precision_list)),  str(np.mean(f_list)), str(np.std(f_list)), str(np.mean(errores)), str(np.std(errores)), str(time.time()-tiempo_i)
    

In [44]:
pd.options.mode.chained_assignment = None

randn = np.random.randn
df_types = pd.DataFrame({
    '#Componentes' : pd.Series(['1','2','3','4','5','6','7','8','9','10','11','12','13','14','15','16','17','18','19'])})
df_types["Eficiencia"] = ""
df_types["Int_Eficiencia"] = ""
df_types["Sensibilidad"] = ""
df_types["Int_Sensibilidad"] = ""
df_types["Precision"] = ""
df_types["Int_Precision"] = ""
df_types["F-Score"] = ""
df_types["Int_F-Score"] = ""
df_types["Error_Prueba"] = ""
df_types["Int_error"] = ""
df_types["Tiempo de ejecución"] = ""
df_types.set_index(['#Componentes'], inplace=True)

i = 0
for k in df_types.index:
    Acc, IntAcc, Sen, IntSen, Pre, IntPre, f, IntF, error, stdError, tiempo = model_MLP(int(k), impresion = False)
    df_types["Eficiencia"][i] = Acc
    df_types["Int_Eficiencia"][i] = IntAcc
    df_types["Sensibilidad"][i] = Sen
    df_types["Int_Sensibilidad"][i] = IntSen
    df_types["Precision"][i] = Pre
    df_types["Int_Precision"][i] = IntPre
    df_types["F-Score"][i] = f
    df_types["Int_F-Score"][i] = IntF
    df_types["Error_Prueba"][i] = error
    df_types["Int_error"][i] = stdError
    df_types["Tiempo de ejecución"][i] = tiempo
    
    i += 1
    
qgrid_widget = qgrid.show_grid(df_types, show_toolbar=False)

In [45]:
qgrid_widget.get_changed_df()

Unnamed: 0_level_0,Eficiencia,Int_Eficiencia,Sensibilidad,Int_Sensibilidad,Precision,Int_Precision,F-Score,Int_F-Score,Error_Prueba,Int_error,Tiempo de ejecución
#Componentes,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,0.5484429065743945,0.0072375434821287,0.5427943423025876,0.2908604674188119,0.5195839921680319,0.2521480485825984,0.5141852997814721,0.2736778254926603,0.4515570934256055,0.0072375434821286,9.66078233718872
2,0.6730103806228374,0.0074414578434624,0.6744915525854668,0.2002557912328354,0.6792126712450237,0.1560015609492065,0.6687704792919227,0.1692551274081084,0.3269896193771626,0.0074414578434624,20.43359470367432
3,0.8287197231833909,0.0127723382873991,0.8291989994530974,0.1518849129363572,0.8309067014574639,0.1339485342830108,0.8271458749947161,0.1369454467265217,0.1712802768166089,0.0127723382873991,19.94715118408203
4,0.8858131487889274,0.0063568072909598,0.8879389940110577,0.0974483108144213,0.8887438288377002,0.0797855095402346,0.8862556483989359,0.0798804807011921,0.1141868512110726,0.0063568072909598,21.114214420318604
5,0.9057093425605536,0.0083422584437655,0.9034966575847748,0.102107587727496,0.9051566368488188,0.086787050436043,0.902886993240809,0.0888559747430656,0.0942906574394463,0.0083422584437655,21.08218288421631
6,0.9178200692041524,0.0077854671280276,0.9160898968093288,0.0941234819603866,0.9170333304514016,0.0708949301894216,0.9150499759142876,0.0759662519103139,0.0821799307958477,0.0077854671280276,21.121220111846924
7,0.944636678200692,0.009554810568501,0.9448236906266352,0.0557281084015938,0.9443029376154616,0.0477024954200168,0.9441086585298702,0.0478078536651185,0.0553633217993079,0.009554810568501,20.705841541290283
8,0.9403114186851212,0.0058029445782866,0.9385940856942624,0.0682683607257196,0.938646736758014,0.0610576148497181,0.938090686202478,0.0614878858557923,0.0596885813148788,0.0058029445782866,21.229318141937256
9,0.9571799307958476,0.0030888531265323,0.956604606634014,0.0474446242538513,0.9570321959957092,0.043765910941464,0.9565431707717884,0.0430392818004275,0.0428200692041522,0.0030888531265323,20.189372301101685
10,0.9584775086505192,0.0047380844074841,0.9578797905846176,0.0444909347484963,0.9577424882746114,0.0420677144656561,0.9574731865487258,0.0398530683867589,0.0415224913494809,0.0047380844074841,22.180182933807373


# Random forest

In [9]:
from sklearn.ensemble import RandomForestClassifier # Se llama a la librería del método Random Forest

In [23]:
def model_RF(nC, impresion = False):
        
    tiempo_i = time.time()
    
    accuracy_list = np.zeros([4])
    precision_list = np.zeros([4,7])
    recall_list = np.zeros([4,7])
    f_list = np.zeros([4,7])
    TN_list = np.zeros([4])
    FP_list = np.zeros([4])
    FN_list = np.zeros([4])
    TP_list = np.zeros([4])   
    errores = np.zeros(4)
    
    RF = RandomForestClassifier(n_estimators = 20, max_features = 10)
    
    for j in range(4):
        
        if nC == 19:
            Xest = X
        else:
            Xest = standardize(X)

        pca = PCA(n_components=nC)
        pca.fit(Xest)
        X_pca = pca.transform(Xest)
        
        Xtrain, Xtest, Ytrain, Ytest = train_test_split(X_pca, Y, test_size=0.25) # Modificar metodología de validación
        if nC == 19:
            scaler = MinMaxScaler()#Escala entre 0 y 1
            Xtrain = scaler.fit_transform(Xtrain)
            Xtest = scaler.transform(Xtest)
        
        RF.fit(Xtrain, Ytrain)
        pred = RF.predict(Xtest)
        
        #code for calculating accuracy 
        _accuracy_ = accuracy_score(Ytest, pred, normalize=True)
        accuracy_list[j] = _accuracy_

        #code for calculating recall 
        _recalls_ = recall_score(Ytest, pred, average=None)
        recall_list[j] = _recalls_

        #code for calculating precision 
        _precisions_ = precision_score(Ytest, pred, average=None)
        precision_list[j] = _precisions_
        
        _f_score_ = f1_score(Ytest, pred, average=None)
        f_list[j] = _f_score_

        #code for calculating confusion matrix 
        _confusion_matrix_ = confusion_matrix(Ytest, pred)
        TN_list[j] = _confusion_matrix_[0][0]
        FP_list[j] = _confusion_matrix_[0][1]
        FN_list[j] = _confusion_matrix_[1][0]
        TP_list[j] = _confusion_matrix_[1][1]
                
        
        errores[j] = classification_error(pred, Ytest)
       
    
    if impresion == True:
        cm = confusion_matrix(Ytest, pred)
        columnas = ['Clase %s'%(i) for i in list(ascii_uppercase)[0:len(np.unique(pred))]]
        df_cm = pd.DataFrame(cm,index = columnas, columns = columnas)
        
        grafica = sns.heatmap(df_cm, cmap = 'Blues', annot = True)
        
        grafica.set(xlabel = 'Verdaderos', ylabel = 'Predicciones')
#         print(classification_report(Ytest, pred))

    return str(np.mean(accuracy_list)), str(np.std(accuracy_list)), str(np.mean(recall_list)), str(np.std(recall_list)), str(np.mean(precision_list)), str(np.std(precision_list)),  str(np.mean(f_list)), str(np.std(f_list)), str(np.mean(errores)), str(np.std(errores)), str(time.time()-tiempo_i)
    

In [30]:
pd.options.mode.chained_assignment = None

randn = np.random.randn
df_types = pd.DataFrame({
    '#Componentes' : pd.Series(['10','11','12','13','14','15','16','17','18','19'])})
df_types["Eficiencia"] = ""
df_types["Int_Eficiencia"] = ""
df_types["Sensibilidad"] = ""
df_types["Int_Sensibilidad"] = ""
df_types["Precision"] = ""
df_types["Int_Precision"] = ""
df_types["F-Score"] = ""
df_types["Int_F-Score"] = ""
df_types["Error_Prueba"] = ""
df_types["Int_error"] = ""
df_types["Tiempo de ejecución"] = ""
df_types.set_index(['#Componentes'], inplace=True)

i = 0
for k in df_types.index:
    Acc, IntAcc, Sen, IntSen, Pre, IntPre, f, IntF, error, stdError, tiempo = model_RF(int(k), impresion = False)
    df_types["Eficiencia"][i] = Acc
    df_types["Int_Eficiencia"][i] = IntAcc
    df_types["Sensibilidad"][i] = Sen
    df_types["Int_Sensibilidad"][i] = IntSen
    df_types["Precision"][i] = Pre
    df_types["Int_Precision"][i] = IntPre
    df_types["F-Score"][i] = f
    df_types["Int_F-Score"][i] = IntF
    df_types["Error_Prueba"][i] = error
    df_types["Int_error"][i] = stdError
    df_types["Tiempo de ejecución"][i] = tiempo
    
    i += 1
    
qgrid_widget = qgrid.show_grid(df_types, show_toolbar=False)

In [31]:
qgrid_widget.get_changed_df()

Unnamed: 0_level_0,Eficiencia,Int_Eficiencia,Sensibilidad,Int_Sensibilidad,Precision,Int_Precision,F-Score,Int_F-Score,Error_Prueba,Int_error,Tiempo de ejecución
#Componentes,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
10,0.9346885813148788,0.0071726314670315,0.9329259163147948,0.0591550034177059,0.93227571988928,0.062425298604145,0.9323589771109264,0.0590328361644007,0.0653114186851211,0.0071726314670315,1.292175531387329
11,0.934256055363322,0.0054710686162082,0.932984458595205,0.0706239506942771,0.9309209862004492,0.0587055532316792,0.931540800031156,0.0624394127607591,0.0657439446366782,0.0054710686162082,1.2741599082946775
12,0.9390138408304498,0.0068524997914164,0.9382065999835418,0.0574724940141985,0.937796945267432,0.0547816142127552,0.9377358146457472,0.0540684086691271,0.0609861591695501,0.0068524997914164,1.2551417350769043
13,0.9368512110726644,0.0101988115246986,0.9381867293910624,0.0650414429783327,0.9387189739108128,0.0651236465727641,0.937923070705991,0.0621054349893814,0.0631487889273356,0.0101988115246986,1.2371253967285156
14,0.9524221453287196,0.0070807549929692,0.9529126276876092,0.0514126976584947,0.9530159260009212,0.044107420213393,0.952552414245253,0.0439780985060405,0.0475778546712802,0.0070807549929692,1.2691552639007568
15,0.948961937716263,0.0130906106837556,0.9499633221060674,0.0613148636253202,0.950479920057709,0.0537812603007835,0.9498215953510571,0.0549282487910859,0.051038062283737,0.0130906106837556,1.2291195392608645
16,0.9480968858131488,0.0076399315452662,0.9468739215809888,0.0568803487651849,0.9473875919815083,0.0572193334889899,0.946884299755836,0.055155585036001,0.0519031141868512,0.0076399315452663,1.2591447830200195
17,0.9485294117647058,0.0148514573645236,0.9479873842322822,0.0643808766263005,0.9477039223975414,0.0624584436408715,0.9473732517008304,0.0603120182233939,0.0514705882352941,0.0148514573645237,1.2651524543762207
18,0.9498269896193772,0.0063568072909598,0.949246740980914,0.0588735053578094,0.9496876563800852,0.050206388973029,0.9490722849534006,0.0518845112732895,0.0501730103806228,0.0063568072909598,1.2731575965881348
19,0.9714532871972318,0.0062976729146025,0.9723457402009142,0.0364959949922143,0.971564462035988,0.0275875299651859,0.9716791884195072,0.0283423365561493,0.0285467128027681,0.0062976729146025,1.1930866241455078


In [22]:
qgrid_widget.get_changed_df()

Unnamed: 0_level_0,Eficiencia,Int_Eficiencia,Sensibilidad,Int_Sensibilidad,Precision,Int_Precision,F-Score,Int_F-Score,Error_Prueba,Int_error,Tiempo de ejecución
#Componentes,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
10,0.939446366782007,0.0044109165342498,0.9393986719308944,0.0584570032775423,0.9388982740527928,0.0516933214511693,0.9389409199411012,0.0534578659467278,0.060553633217993,0.0044109165342498,1.3121936321258545
11,0.9420415224913496,0.0069742714085627,0.9397555413149022,0.0597330798556776,0.9407051251950528,0.0566480889004106,0.9399529200542452,0.0563276736131192,0.0579584775086505,0.0069742714085627,1.3091914653778076
12,0.9359861591695502,0.0047380844074841,0.9369979658865324,0.0644724426206023,0.9367299883072892,0.0519672022291227,0.936187144771403,0.0536591509083473,0.0640138408304498,0.0047380844074841,1.3011841773986816
13,0.9411764705882352,0.0137866586942121,0.9398163898557674,0.064566347197512,0.940391389895924,0.0653672086219689,0.9398460977212852,0.0631537698162972,0.0588235294117647,0.0137866586942121,1.2791645526885986
14,0.9459342560553632,0.0075784668978958,0.946849459903724,0.0540267907207767,0.9472935548850228,0.0580647951653968,0.9467386557797256,0.0536374135614875,0.0540657439446366,0.0075784668978958,1.2611472606658936
15,0.9506920415224912,0.0067562713459399,0.9502073292310698,0.0598283019758125,0.95079280593159,0.052838231714241,0.9499950594521226,0.0528849936989311,0.0493079584775086,0.00675627134594,1.312194585800171
16,0.9437716262975778,0.0073910067001016,0.9450733557123446,0.0613327770988611,0.944784770547569,0.0627606879472328,0.9444066472660796,0.0587235510912799,0.0562283737024221,0.0073910067001016,1.325204610824585
17,0.9537197231833908,0.0049504857881745,0.9549398873333405,0.0527118408896487,0.9544233299715644,0.0476528211740343,0.9540813098880484,0.0452347836599961,0.046280276816609,0.0049504857881745,1.2881720066070557
18,0.9519896193771626,0.0050994057623492,0.9519441342788464,0.0518562609234489,0.9525050432110792,0.0486314166488156,0.9520324793025028,0.0486516631410049,0.0480103806228373,0.0050994057623493,1.1990911960601809
19,0.971453287197232,0.0055390348074678,0.9714423920696494,0.0341140268005667,0.9717590843405476,0.0271421392805637,0.9713586231254324,0.0270572363880842,0.0285467128027681,0.0055390348074678,1.2921757698059082


# Maquinas de Soporte Vectorial con kernel lineal y con kernel RBF.

In [89]:
from sklearn.svm import SVC

In [90]:
def model_SVC(nC, impresion = False):
    
    tiempo_i = time.time()

    accuracy_list = np.zeros([4])
    precision_list = np.zeros([4,7])
    recall_list = np.zeros([4,7])
    f_list = np.zeros([4,7])
    TN_list = np.zeros([4])
    FP_list = np.zeros([4])
    FN_list = np.zeros([4])
    TP_list = np.zeros([4])   
    errores = np.zeros(4)
    PorcentajeVS = np.zeros(4)
    
    svc = SVC(gamma=0.1, C=float(300),  kernel='rbf', decision_function_shape='ovo' , probability = True)
    
    for j in range(4):
        
        Xest = standardize(X)

        pca = PCA(n_components=nC)
        pca.fit(Xest)
        X_pca = pca.transform(Xest)
        
        Xtrain, Xtest, Ytrain, Ytest = train_test_split(X_pca, Y, test_size=0.25) # Modificar metodología de validación
#         scaler = MinMaxScaler()#Escala entre 0 y 1
#         Xtrain = scaler.fit_transform(Xtrain)
#         Xtest = scaler.transform(Xtest)
        
        svc.fit(Xtrain, Ytrain)
        pred = svc.predict(Xtest)
        
        #code for calculating accuracy 
        _accuracy_ = accuracy_score(Ytest, pred, normalize=True)
        accuracy_list[j] = _accuracy_

        #code for calculating recall 
        _recalls_ = recall_score(Ytest, pred, average=None)
        recall_list[j] = _recalls_

        #code for calculating precision 
        _precisions_ = precision_score(Ytest, pred, average=None)
        precision_list[j] = _precisions_
        
        _f_score_ = f1_score(Ytest, pred, average=None)
        f_list[j] = _f_score_

        #code for calculating confusion matrix 
        _confusion_matrix_ = confusion_matrix(Ytest, pred)
        TN_list[j] = _confusion_matrix_[0][0]
        FP_list[j] = _confusion_matrix_[0][1]
        FN_list[j] = _confusion_matrix_[1][0]
        TP_list[j] = _confusion_matrix_[1][1]
                
        errores[j] = classification_error(pred, Ytest)
        PorcentajeVS[j] = svc.support_vectors_.shape[0] / Xtrain.shape[0]
    
    if impresion == True:
        print(confusion_matrix(Ytest, pred))
        print(classification_report(Ytest, pred))

    return str(np.mean(accuracy_list)), str(np.std(accuracy_list)), str(np.mean(recall_list)), str(np.std(recall_list)), str(np.mean(precision_list)), str(np.std(precision_list)),  str(np.mean(f_list)), str(np.std(f_list)), str(np.mean(errores)), str(np.std(errores)), str(np.mean(PorcentajeVS)), str(time.time()-tiempo_i)
    

In [93]:
pd.options.mode.chained_assignment = None

randn = np.random.randn
df_types = pd.DataFrame({
    '#Componentes' : pd.Series(['1','2','3','4','5','6','7','8','9','10','11','12','13','14','15','16','17','18','19'])})
df_types["Eficiencia"] = ""
df_types["Int_Eficiencia"] = ""
df_types["Sensibilidad"] = ""
df_types["Int_Sensibilidad"] = ""
df_types["Precision"] = ""
df_types["Int_Precision"] = ""
df_types["F-Score"] = ""
df_types["Int_F-Score"] = ""
df_types["Error_Prueba"] = ""
df_types["Int_error"] = ""
df_types["%Vectores de soporte"] = ""
df_types["Tiempo de ejecución"] = ""
df_types.set_index(['#Componentes'], inplace=True)

i = 0
for k in df_types.index:
    Acc, IntAcc, Sen, IntSen, Pre, IntPre, f, IntF, error, stdError, vs, tiempo = model_SVC(int(k), impresion = False)
    df_types["Eficiencia"][i] = Acc
    df_types["Int_Eficiencia"][i] = IntAcc
    df_types["Sensibilidad"][i] = Sen
    df_types["Int_Sensibilidad"][i] = IntSen
    df_types["Precision"][i] = Pre
    df_types["Int_Precision"][i] = IntPre
    df_types["F-Score"][i] = f
    df_types["Int_F-Score"][i] = IntF
    df_types["Error_Prueba"][i] = error
    df_types["Int_error"][i] = stdError
    df_types["%Vectores de soporte"][i] = vs
    df_types["Tiempo de ejecución"][i] = tiempo
    
    i += 1
    
qgrid_widget = qgrid.show_grid(df_types, show_toolbar=False)

In [94]:
qgrid_widget.get_changed_df()

Unnamed: 0_level_0,Eficiencia,Int_Eficiencia,Sensibilidad,Int_Sensibilidad,Precision,Int_Precision,F-Score,Int_F-Score,Error_Prueba,Int_error,%Vectores de soporte,Tiempo de ejecución
#Componentes,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1,0.5294117647058822,0.0115412318893826,0.5471530520018137,0.3312690545756718,0.5647127198108707,0.2036852198606715,0.4993480745263419,0.2616442870166283,0.4705882352941176,0.0115412318893826,0.7536085450346419,2.3391966819763184
2,0.6314878892733564,0.0112788968948142,0.6436000162962415,0.2370713784354784,0.6678200698130862,0.1756325431606552,0.6299390442441671,0.1826668256502432,0.3685121107266436,0.0112788968948142,0.6685912240184757,1.989807367324829
3,0.8451557093425606,0.014603756934372,0.8452326453939553,0.153860263503156,0.8483595728286393,0.1171403111424107,0.8414824239941417,0.1266696522763659,0.1548442906574394,0.014603756934372,0.3800519630484988,1.5794382095336914
4,0.865916955017301,0.0061777062530647,0.8680568429278619,0.1365211323353522,0.8762097910563373,0.1010820520468632,0.8653985930065173,0.0998448150194017,0.1340830449826989,0.0061777062530647,0.3145207852193995,1.5884449481964111
5,0.9065743944636676,0.0087365959674412,0.90405828927084,0.1071053104185802,0.9095161665025088,0.0809083783103116,0.903192959528612,0.0801786216062164,0.0934256055363321,0.0087365959674412,0.302973441108545,1.6755256652832031
6,0.9199826989619376,0.0085087870125026,0.9186960409096622,0.0924944825382953,0.9207258944642636,0.0726602946515965,0.9183682820437308,0.0765329609791938,0.0800173010380622,0.0085087870125025,0.2678983833718245,1.507371187210083
7,0.944636678200692,0.009554810568501,0.9434288012077268,0.0628490648672953,0.9435679450224648,0.0519460897687903,0.9430737548664594,0.0544535871969964,0.0553633217993079,0.009554810568501,0.2560623556581985,1.5102863311767578
8,0.9459342560553632,0.004474083232175,0.9449660999249268,0.0591032799410581,0.9450281748758578,0.0523352079585071,0.9444049884921956,0.0512213982642878,0.0540657439446366,0.004474083232175,0.2385969976905311,1.382620334625244
9,0.9515570934256056,0.007027714882903,0.952652555123182,0.0576484804089948,0.9522911499039018,0.0446025964858401,0.951937181989107,0.0469636884603362,0.0484429065743944,0.007027714882903,0.23094688221709,1.3462259769439695
10,0.9563148788927336,0.0108993106992682,0.9556042720358254,0.0456847168311544,0.955840192605834,0.0455956174876501,0.9553398010329844,0.0420417153971993,0.0436851211072664,0.0108993106992682,0.2423498845265589,1.3622386455535889
