In [34]:
import numpy as np
import random
import math
from operator import mul,neg

from EstrategiaParticionado import ValidacionCruzada
from datos import Datos
from ClasificadorMulticlase import ClasificadorMulticlase
from Clasificador import Clasificador

from scipy.special import expit
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score 
from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split
from sklearn import datasets
from numpy.linalg import norm




In [2]:
def producto_escalar(w,x):
    return sum(map(mul,w,x))


In [3]:
def sigmoidal (a):
    return expit(a)

In [4]:
def crear_vector_w(d):
    return np.array([ random.uniform(-1,1) for i in range(d) ])


In [5]:
crear_vector_w(6)

array([ 0.79653017,  0.86189176,  0.98181759,  0.75480081,  0.60149136,
        0.0801038 ])

In [6]:
def crear_vector_datos(dato):
    return np.insert(dato,0,1)

In [7]:
crear_vector_datos(np.array([0,8,3,2]))

array([1, 0, 8, 3, 2])

In [8]:
vector = np.array([1,17.99,10.38,122.8,1001,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,1.095,0.9053,8.589,153.4,0.006399,0.04904,0.05373,0.01587,0.03003,0.006193,25.38,17.33,184.6,2019,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189])
print(producto_escalar(crear_vector_w(len(vector)),vector))


-1800.74014311


In [26]:
class ClasificadorRegresionLogistica(Clasificador):
    w = np.array([])
    n = 0.0 #cte aprendizaje
    num_epocas = 0

    def __init__(self,num_epocas, n):
        super(ClasificadorRegresionLogistica, self).__init__()
        self.num_epocas = num_epocas
        self.n = n
        
    def __generar_vector_nuevo(self,dato,sigmoidal,clase):
        factor = self.n * (sigmoidal - clase)
        return self.w - np.array(list(map(lambda x : x * factor, dato)))
    
    def entrenamiento(self, datosTrain, atributosDiscretos, diccionario):
        self.w = crear_vector_w(len(atributosDiscretos))
        for epoca in range(self.num_epocas):
            for dato in datosTrain:
                vector_datos = crear_vector_datos(dato[:-1])
                pe = producto_escalar( self.w,vector_datos)
                sigmoidal_res = sigmoidal(pe)
                self.w = self.__generar_vector_nuevo(vector_datos,
                                                     sigmoidal_res,
                                                     dato[-1])
        return self.w
    def __calcular_confianza(self,vector_dato_test):
        return producto_escalar(vector_dato_test,self.w) / norm(self.w)
    
    def __verosimilitud(self,vector_dato_test):
        return sigmoidal(producto_escalar(self.w,vector_dato_test))
    
    def __clasifica_dato(self,dato_test):
        return int(self.__verosimilitud(dato_test) > 0.5)
    
    def clasifica (self, datosTest, atributosDiscretos, diccionario):
        vectores_datos_test = map(crear_vector_datos,datosTest)
        return list(map(self.__clasifica_dato, vectores_datos_test))
    
    def __confianza(self, datosTest):
        vectores_datos_test = map(crear_vector_datos,datosTest)
        return [ self.__calcular_confianza(vector_dato_test)
                   for vector_dato_test in vectores_datos_test]
    
    def score (self, datosTest, atributosDiscretos, diccionario):
        scores = np.zeros((len(datosTest),2))
        scores[:,0] = self.clasifica(datosTest,atributosDiscretos,diccionario)
        scores[:,1] = np.array(self.__confianza(datosTest))
        return scores


In [35]:
wdbc = Datos('../ficheros/ConjuntosDatos/wdbc.data',True)
example3 = Datos('../ficheros/ConjuntosDatos/example3.data',True)
example4 = Datos('../ficheros/ConjuntosDatos/example4.data',True)
wine = Datos('../ficheros/ConjuntosDatos/wine_proc.data',True)
digits = Datos('../ficheros/ConjuntosDatos/digits.data',True)
iris = datasets.load_iris()

In [37]:
estrategia = ValidacionCruzada(10)
sklearn_cv = KFold(n_splits=10,shuffle=True)
clasificador_sklearn = LogisticRegression() 


In [12]:
#clasificador = ClasificadorRegresionLogistica(10,1)

scores = cross_val_score(clasificador_sklearn, wdbc.datos[:,:-1],wdbc.datos[:,-1],cv = sklearn_cv)
print(np.mean(scores))

0.602819548872


In [20]:
X_train,X_test,y_train,y_test = train_test_split(wdbc.datos[:,:-1],
                                                 wdbc.datos[:,-1], 
                                                 test_size = 0.2)
clasificador_sklearn.fit(X_train,y_train)
print(clasificador_sklearn.score(X_test,y_test))

0.552631578947


In [132]:
print(len(wdbc.datos[0]))

31


In [196]:
X_train,X_test,y_train,y_test = train_test_split(example3.datos[:,:-1],
                                                 example3.datos[:,-1], 
                                                 test_size = 0.2)
clasificador_sklearn.fit(X_train,y_train)
print(clasificador_sklearn.score(X_test,y_test))

0.55


In [21]:
clasificador = ClasificadorRegresionLogistica(100,1)

In [182]:
errores = clasificador.validacion(estrategia,wdbc,clasificador)

print(1 - np.mean(errores))

0.529532967033


In [184]:
errores = clasificador.validacion(estrategia,example3,clasificador)

print(1 - np.mean(errores))

0.475


In [185]:
errores = clasificador.validacion(estrategia,example4,clasificador)

print(1 - np.mean(errores))

0.47


In [36]:
clasificador = ClasificadorMulticlase(ClasificadorRegresionLogistica(10,1))

In [33]:
errores = clasificador.validacion(estrategia,wine,clasificador)
print(1 - np.mean(errores))

0.402588235294


In [39]:
errores = clasificador.validacion(estrategia,digits,clasificador)
print(1 - np.mean(errores))

0.109574468085
