# K-MEDIAS-BORROSO

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

In [2]:
##### PRUEBAS #####

# X = np.array([[1,1,1], [2,2,1]])
# V = np.array([[3,1,2], [1,2,1], [4,5,4]])

# X = np.array([
#     [1, 1],
#     [1, 3],
#     [1, 5],
#     [2, 2],
#     [2, 3],
#     [6, 3],
#     [6, 4],
#     [7, 1],
#     [7, 3],
#     [7, 5] 
# ])

# V = np.array([[6.7, 3.43], [2.39, 2.94]])

##### PRUEBAS #####

In [3]:
def formula(d, b):
    return np.power(
        1 / d,
        (1 / (b - 1))
    )

In [4]:
def dist(x, v):
    return np.sum(
        np.square(x - v)
    )

In [5]:
def calcularDivisor(Xj, V, b = 2):
    divisor = 0
    for k in V:
        res = formula(
            dist(Xj, k),
            b
        )
        divisor += res
        
    return divisor

In [6]:
def calcularP(X, V, b = 2):
    P = np.zeros((V.shape[0], X.shape[0]))
    for j in range(X.shape[0]):
        for i in range(V.shape[0]):
            dividendo = formula(dist(V[i], X[j]), b)
            divisor = calcularDivisor(X[j], V)

            P[i, j] = dividendo / divisor
            
    return P

In [7]:
def recalcularCentros(X, U):
    print()
    fin = []
    for i in range(U.shape[0]):
        aux = []
        for j in range(U.shape[1]):
            aux.append(np.square(U[i, j]) * X[j])

        dividendo = np.sum(np.array(aux), axis=0)
        divisor = np.sum(np.square(U[i, :]))
        fin.append(dividendo / divisor)
        
    return np.array(fin)

In [8]:
def cumpleEpsilon(vAntiguo, vNuevo, epsilon):
    return np.sqrt(np.sum(np.square(vNuevo-vAntiguo))) < epsilon
        
def seguirActualizando(vAntiguo, vNuevo, epsilon):
    actualizar = False
    for i in range(vAntiguo.shape[0]):
        if(not cumpleEpsilon(vAntiguo, vNuevo, epsilon)):
            actualizar = True
            break
            
    return actualizar

In [9]:
def dibujarGrafica(X, V):
    """
    Solo se dibuja la gráfica cuando las V tiene dos dimensiones
    """
    if(V.shape[1] == 2):
        colores = ['b', 'g', 'r', 'c', 'm', 'y', 'k']
        ax = plt.gca()    

        for i in range(V.shape[0]):

            ax.add_patch(plt.Circle((V[i, 0], V[i, 1]), 2, color=colores[i], alpha=0.4))
            ax.plot(V[i, 0], V[i, 1], 'o', c = 'r')

        ax.plot(X[:, 0], X[:, 1], 'o', c = 'b')

        plt.show()

In [10]:
def trainKMedias(X, V, b, epsilon):
    print("\n########## ENTRENAMIENTO DE K-MEDIAS ##########\n")
    i = 0
    while True:
        P = calcularP(X, V, b)
        vNuevo = recalcularCentros(X, P)
        if seguirActualizando(V, vNuevo, epsilon):
            print("En la iteración {} los centroides son:\n{}"
                 .format(i, V))
            dibujarGrafica(X, V)
            V = vNuevo
        else:
            print("Hemos terminado en la iteración {}, con los centroides:\n{}\n"
                 .format(i, V))
            dibujarGrafica(X, V)

            break

        i += 1
        
    return V

In [11]:
def testKMedias(V, vName):
    print("########## TEST DE K-MEDIAS ##########")
    for file in os.listdir('test'):
        df = pd.read_csv('test/' + file, header=None)
        pruebaX = np.array(df.iloc[:, :-1])
        pruebaY = np.array(df.iloc[:, -1])
        P = calcularP(pruebaX, V)
        posMax = P.argmax()

        print("El archivo {} pertenece al centroide situado en\n{} -> {}\n"
             .format(file, V[posMax], vName[posMax]))
