# Mapas autoorgnizados

## Santiago Blasco Arnaiz 

In [1]:
import numpy as np
from sklearn.datasets import fetch_mldata
from sklearn.utils import check_array
from sklearn.preprocessing import normalize
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from scipy.stats import mode

In [2]:
class Neurona:
    
    def __init__(self, size, x, y):
        self.weightVec = normalize(0.5 - np.random.rand(1,size))[0]
        self.x = x
        self.y = y
        self.size = size
    
    def predict(self, input):
        return self.weightVec @ input.T
    
    def fit(self, input, alfa):
        self.weightVec = normalize(self.weightVec + (alfa*input))
    
    def neuron_labeling(self, input, target):
        max = np.argmax(input @ self.weightVec.T)
        self.label = target[max]
        return self.label
    
    def print_weightVec(self):
        print(self.x,self.y,self.weightVec)
    
    def getX(self):
        return self.x

    def getY(self):
        return self.y
    

## SOM implementado con el método del coseno

In [3]:
class SOM:
    
    def __init__(self, x, y, size, radious):
        self.map = []
        self.dimX = x
        self.dimY = y
        self.radious = radious
        
        #Creamos el mapa de neuronas
        for i in range(x):
            for j in range(y):
                self.map.append(Neurona(size,i,j))
    
    #Para testear
    def print_map(self):
        for i in self.map:
            i.print_weightVec()
    
    #Devuelve la neurona indicada
    def get_neuron(self,x,y):
        indice = y * self.dimX + x
        return self.map[indice]
    
    #Ajuste
    def fit(self, input, radious, alfa, times):
        self.radious = radious
        
        self.alfa = alfa
        t = 0
        
        #Para cada dato de la entrada y en cada época
        for time in range(times):
            print("TIME",time)
            for x in input:
                #Modificación del alfa
                self.alfa = alfa / 1 + (t/input.shape[0])
                t += 1
                
                #Mayor valor
                winValue = 0
                
                for n in self.map:
                    value = n.predict(x)
                    if value > winValue:
                        winValue = value
                        #X e Y de la neurona ganadora
                        winX = n.getX()
                        winY = n.getY()
                #X e Y iniciales en la superficie de radio indicado
                beginX = winX - self.radious
                beginY = winY - self.radious
                
                #Reajustamos los pesos de las neuronas vecinas a la ganadora
                for i in range( (self.radious*2) + 1 ):
                    for j in range( (self.radious*2) + 1 ):
                        self.get_neuron((beginX + i)%self.dimX, (beginY + j)%self.dimY).fit(x.reshape(1,-1),self.alfa)
            #Decrementamos el radio
            if self.radious > 0:
                self.radious -= 1
    
    #Etiquetas según neurona
    def labelByNeuron(self, input, target):
        #Máximo de cada neurona
        self.labels = []
        for n in self.map:
            self.labels.append(n.neuron_labeling(input, target))
    
    #Etiquetas según moda
    def labelByMode(self, input, target):
        self.labels = [[] for _ in self.map]
        
        #Guardamos el valor objetivo del mayor valor predicho para cada dato
        for x in range(input.shape[0]):

            pred = self.predict(input[x].reshape(1,-1))
            self.labels[np.argmax(pred)].append(target[x])
        #Hayamos la moda para cada neurona
        for i in range(len(self.labels)):
            if self.labels[i] != []:
                
                self.labels[i] = mode(np.array(self.labels[i]))[0][0]
            else:
                self.labels[i] = -1
        
    #Predicción de etiquetas
    def label_predict(self, input):
        labelList = []
        for x in input:
            labelList.append(self.labels[np.argmax(self.predict(x.reshape(1,-1)))])
        return np.array(labelList).reshape(input.shape[0],-1)
    
    #Predicción
    def predict(self, input):
        output = []
        for x in input:
            for n in self.map:
                output.append(n.predict(x.reshape(1,-1)))
        return np.array(output).reshape(input.shape[0],-1)

## Datos del ejercicio

In [4]:
sizeX = 12
sizeY = 8
radious = 3
alfa = 20
times = 5

Descargamos el dataset, le añadimos una muestra más a cada dato y lo normalizamos

In [5]:
mnist = fetch_mldata("MNIST original")
print("mnist",mnist.data.shape)


X = normalize(np.c_[mnist.data, np.ones(mnist.data.shape[0])])
X.shape
print("mnist extendido",X.shape)

#Separamos el dataset para entrenamiento y prueba
x_train, x_test, y_train, y_test = train_test_split(X, mnist.target, test_size=10000)
print("Train",x_train.shape, y_train.shape,"Test", x_test.shape, y_test.shape)






mnist (70000, 784)
mnist extendido (70000, 785)
Train (60000, 785) (60000,) Test (10000, 785) (10000,)


## Creación del mapa autoorganizado

In [6]:
SOMap = SOM(sizeX, sizeY, x_train.shape[1], radious)

Ajuste

In [7]:
SOMap.fit(x_train, radious, alfa, times)

TIME 0
TIME 1
TIME 2
TIME 3
TIME 4


## Etiquetado por neuronas

In [8]:
SOMap.labelByNeuron(x_train,y_train)

In [9]:
predicho = SOMap.label_predict(x_test)

In [10]:
print(accuracy_score(predicho, y_test))

0.7346


Este es el porcentaje de acierto utilizando etiquetado por neuronas

## Etiquetado por moda

In [11]:
SOMap.labelByMode(x_train,y_train)

In [12]:
predicho = SOMap.label_predict(x_test)

In [13]:
print(accuracy_score(predicho, y_test))

0.7488


Este es el porcentaje de acierto utilizando etiquetado por moda