In [1]:
# utilizado para la manipulación de directorios y rutas
import os

# Cálculo científico y vectorial para python
import numpy as np

# Libreria para graficos
from matplotlib import pyplot

# Modulo de optimizacion en scipy
from scipy import optimize

# le dice a matplotlib que incruste gráficos en el cuaderno
%matplotlib inline

In [2]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [17]:
data = np.loadtxt("/content/gdrive/MyDrive/Colab Notebooks/machine learning/data/wine_preparado.csv", delimiter=',')
# print(data)
X, y = data[:, 1:], data[:, 0]
print(X.shape)
print(y.shape)

(178, 13)
(178,)


In [16]:
y

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2., 2.,
       2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.,
       2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.,
       2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.,
       2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 3., 3., 3., 3., 3., 3.,
       3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3.,
       3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3.,
       3., 3., 3., 3., 3., 3.])

In [18]:
# data.shape
X.shape[1]

13

In [5]:
input_layer_size  = 13
num_labels = 3
m = y.size

In [6]:
def calcularSigmoide(z):
    # Calcula la sigmoide de z.

    return 1.0 / (1.0 + np.exp(-z))

In [7]:
def calcularCosto(theta, X, y, lambda_):
    # Inicializa algunos valores utiles
    m = y.size

    # convierte las etiquetas a valores enteros si son boleanos
    if y.dtype == bool:
        y = y.astype(int)

    J = 0
    grad = np.zeros(theta.shape)

    h = calcularSigmoide(X.dot(theta.T))

    temp = theta
    temp[0] = 0

    J = (1 / m) * np.sum(-y.dot(np.log(h)) - (1 - y).dot(np.log(1 - h))) + (lambda_ / (2 * m)) * np.sum(np.square(temp))

    grad = (1 / m) * (h - y).dot(X)
    # Se aplica regularizacion en la siguiente linea
    grad = grad + (lambda_ / m) * temp

    return J, grad

In [19]:
# valores de prueba para los parámetros theta
theta_t = np.array([-2, -1, 1, 2], dtype=float)

# valores de prueba para las entradas
X_t = np.concatenate((np.ones((5, 1)), np.arange(1, 16).reshape(5, 3, order='F')/10.0), axis=1)
print(X_t)
# valores de testeo para las etiquetas
y_t = np.array([1, 0, 1, 0, 1])

# valores de testeo para el parametro de regularizacion
lambda_t = 3

[[1.  0.1 0.6 1.1]
 [1.  0.2 0.7 1.2]
 [1.  0.3 0.8 1.3]
 [1.  0.4 0.9 1.4]
 [1.  0.5 1.  1.5]]


In [9]:
J, grad = calcularCosto(theta_t, X_t, y_t, lambda_t)

print('Costo         : {:.6f}'.format(J))
print('Costo esperadot: 2.534819')
print('-----------------------')
print('Gradientes:')
print(' [{:.6f}, {:.6f}, {:.6f}, {:.6f}]'.format(*grad))
print('Gradientes esperados:')
print(' [0.146561, -0.548558, 0.724722, 1.398003]');

Costo         : 2.534819
Costo esperadot: 2.534819
-----------------------
Gradientes:
 [0.146561, -0.548558, 0.724722, 1.398003]
Gradientes esperados:
 [0.146561, -0.548558, 0.724722, 1.398003]


In [10]:
def oneVsAll(X, y, num_labels, lambda_):

    m, n = X.shape

    all_theta = np.zeros((num_labels, n + 1))

    # Agrega unos a la matriz X
    X = np.concatenate([np.ones((m, 1)), X], axis=1)


    for c in np.arange(num_labels):
        initial_theta = np.zeros(n + 1)
        options = {'maxiter': 50}
        res = optimize.minimize(calcularCosto,
                                initial_theta,
                                (X, (y == (c + 1)), lambda_),
                                jac=True,
                                method='BFGS',
                                options=options)

        all_theta[c] = res.x

    return all_theta

In [11]:
lambda_ = 0.1
all_theta = oneVsAll(X, y, num_labels, lambda_)

  return 1.0 / (1.0 + np.exp(-z))
  J = (1 / m) * np.sum(-y.dot(np.log(h)) - (1 - y).dot(np.log(1 - h))) + (lambda_ / (2 * m)) * np.sum(np.square(temp))


In [12]:
print(all_theta)

[[-1.37419736e+00 -1.24872182e+00  1.44902038e+00  3.79340287e+00
  -1.01177895e+00  1.10627690e-02 -5.25334452e-01  3.23516777e+00
   4.66611223e-01 -2.39524959e+00 -3.40572498e-01 -3.93080497e-01
   1.68986539e+00  2.24890182e-02]
 [ 2.42219535e+00  1.42932061e+00 -1.36192505e+00 -3.42928593e+00
   4.97614576e-01 -1.08248477e-02  5.69333804e-01  1.51707218e+00
   2.41019148e+00  1.77571291e+00 -3.04424661e+00  3.70522541e+00
  -1.54800916e+00 -1.89252272e-02]
 [-2.85540791e-01 -4.01635117e-01  1.11510275e+00  4.97859627e-01
   8.70812898e-02  4.61786492e-02 -1.13870605e+00 -4.07900015e+00
  -4.16033923e-01 -1.40857406e+00  1.57579344e+00 -1.30800389e+00
  -2.49991317e+00  3.38602231e-03]]


In [13]:
def predictOneVsAll(all_theta, X):
    m = X.shape[0];
    num_labels = all_theta.shape[0]

    p = np.zeros(m)

    # Add ones to the X data matrix
    X = np.concatenate([np.ones((m, 1)), X], axis=1)
    p = np.argmax(calcularSigmoide(X.dot(all_theta.T)), axis = 1)

    return p + 1

In [23]:
print(X.shape)
pred = predictOneVsAll(all_theta, X)
print('Precision del conjuto de entrenamiento: {:.2f}%'.format(np.mean(pred == y) * 100))
XPrueba = X[128:141, :].copy()
yPrueba = y[128:141].copy()
# XPrueba = [[11.44, 2.3, 2.41, 20, 96, 2.9, 2.79, 0.82, 1.83, 3.25, 0.8, 3.39, 625]]
# yPrueba = [2]
print(XPrueba.shape)
print(len(XPrueba))
# print(XPrueba)
# print(yPrueba.shape)
# print(yPrueba)

#print(np.ones((1)))
#p = np.zeros(1)
XPrueba = np.concatenate((np.ones((13, 1)), XPrueba), axis=1)
print(XPrueba)
# print(XPrueba.shape)
# print(XPrueba)
p = np.argmax(calcularSigmoide(XPrueba.dot(all_theta.T)), axis = 1)
print(p + 1)

(178, 13)
Precision del conjuto de entrenamiento: 99.44%
(13, 13)
13
[[1.000e+00 1.237e+01 1.630e+00 2.300e+00 2.450e+01 8.800e+01 2.220e+00
  2.450e+00 4.000e-01 1.900e+00 2.120e+00 8.900e-01 2.780e+00 3.420e+02]
 [1.000e+00 1.204e+01 4.300e+00 2.380e+00 2.200e+01 8.000e+01 2.100e+00
  1.750e+00 4.200e-01 1.350e+00 2.600e+00 7.900e-01 2.570e+00 5.800e+02]
 [1.000e+00 1.286e+01 1.350e+00 2.320e+00 1.800e+01 1.220e+02 1.510e+00
  1.250e+00 2.100e-01 9.400e-01 4.100e+00 7.600e-01 1.290e+00 6.300e+02]
 [1.000e+00 1.288e+01 2.990e+00 2.400e+00 2.000e+01 1.040e+02 1.300e+00
  1.220e+00 2.400e-01 8.300e-01 5.400e+00 7.400e-01 1.420e+00 5.300e+02]
 [1.000e+00 1.281e+01 2.310e+00 2.400e+00 2.400e+01 9.800e+01 1.150e+00
  1.090e+00 2.700e-01 8.300e-01 5.700e+00 6.600e-01 1.360e+00 5.600e+02]
 [1.000e+00 1.270e+01 3.550e+00 2.360e+00 2.150e+01 1.060e+02 1.700e+00
  1.200e+00 1.700e-01 8.400e-01 5.000e+00 7.800e-01 1.290e+00 6.000e+02]
 [1.000e+00 1.251e+01 1.240e+00 2.250e+00 1.750e+01 8.500e+01