In [1]:
import glob
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
%matplotlib inline

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
#Los datos corresponden a un subset del dataset cats vs dogs de https://www.kaggle.com/c/dogs-vs-cats
files=glob.glob('/content/drive/MyDrive/Machine_Learning/CATS_DOGS/*.jpg')

In [4]:
print(files[:10])

['/content/drive/MyDrive/Machine_Learning/CATS_DOGS/cat.9249.jpg', '/content/drive/MyDrive/Machine_Learning/CATS_DOGS/cat.8745.jpg', '/content/drive/MyDrive/Machine_Learning/CATS_DOGS/cat.9261.jpg', '/content/drive/MyDrive/Machine_Learning/CATS_DOGS/cat.8787.jpg', '/content/drive/MyDrive/Machine_Learning/CATS_DOGS/cat.9467.jpg', '/content/drive/MyDrive/Machine_Learning/CATS_DOGS/cat.9315.jpg', '/content/drive/MyDrive/Machine_Learning/CATS_DOGS/cat.8802.jpg', '/content/drive/MyDrive/Machine_Learning/CATS_DOGS/cat.9458.jpg', '/content/drive/MyDrive/Machine_Learning/CATS_DOGS/cat.8630.jpg', '/content/drive/MyDrive/Machine_Learning/CATS_DOGS/cat.8751.jpg']


**Training dataset**

In [5]:
size = 64

train_data = np.array([
    np.asarray(Image.open(file).resize((size, size)))
    for file in files[:200]
])

train_label = np.array([
    1 if 'dog' in file else 0
    for file in files[:200]
])

test_label = np.array([
    1 if 'dog' in file else 0
    for file in files[200:]
])

test_data = np.array([
    np.asarray(Image.open(file).resize((size, size)))
    for file in files[200:]
])

In [6]:
train_data_flatten = train_data.reshape(train_data.shape[0], -1).T
train_set_label=train_label.reshape((1,train_label.shape[0]))
test_set_label=test_label.reshape((1,test_label.shape[0]))
test_data_flatten = test_data.reshape(test_data.shape[0], -1).T

In [7]:
test_data_flatten.shape

(12288, 1617)

In [8]:
np.max(train_data_flatten)

np.uint8(255)

**Normalization**

In [9]:
train_set_data= train_data_flatten/255
test_set_data = test_data_flatten/np.max(test_data_flatten)

In [10]:
train_set_data.shape, test_set_data.shape

((12288, 200), (12288, 1617))

**Sigmoid function:**

$a = \sigma(z)=\frac{1}{1+e^{-z}}$

where

$z = \omega_{0} x + \omega_{1}$

In [11]:
def sigmoid(z):
  a = 1/(1+np.exp(-z))
  return a

In [12]:
w,b = np.zeros((len(train_set_data),1)),0
l = sigmoid(np.matmul(w.T,train_set_data)+b)
l.shape

(1, 200)

**Cost function:**

$J(w,b)= -\frac{1}{m}\sum{[y^{(i)}\log{\hat{y}^{(i)}}+(1-y^{(i)})\log{(1-\hat{y}^{(i)}})]}$

$\omega =  \omega -\frac{1}{m}\alpha [X\cdot(a-y)]$

$b = b - \frac{1}{m}\alpha(a-y)$

In [13]:
def grad(x,y,w,b, iteraciones, alpha):
  m = x.shape[1]
  w,b = np.zeros((len(train_set_data),1)),0 #Inicializamos los pesos en cero
  for i in range(iteraciones):
    suma = 0 #Defino la variable donde guardaré el valor de la sumatoria para el cálculo del costo
    a = sigmoid(np.matmul(w.T,x)+b) #Calcula la función de activación con w,b iguales a cero en principio
    #print(a.shape)
    w = w-alpha*(1/m)*np.matmul(x,(a-y).T) #Actualizamos w usando dw
    db = np.mean(a - y)
    b = b - alpha * db #Actualizamos b usando db
    #b = b-alpha*(1/m)*(a-y) #Actualizamos b usando db
    for j in range(a.shape[1]): #Hacemos el cálculo del costo con el w,b iniciales
      suma += y[:1,j]*np.log(a[:1,j])+(1-y[:1,j])*np.log(1-(a[:1,j]))
    J = -(1/m)*sum(suma) #Calculamos el costo
    f = i%(iteraciones/10)
    if f == 0:
      print('J en la ',i,'-ésima iteracion:', J) #Imprimimos el costo en cada iteración
  return w, b, J

In [14]:
w, b, J = grad(x=train_set_data, y=train_set_label, w=w,b=b, iteraciones=100, alpha=0.01) #Realizo el grad. desc. con el conjunto de entrenamiento

J en la  0 -ésima iteracion: 0.6931471805599465
J en la  10 -ésima iteracion: 0.710268813660428
J en la  20 -ésima iteracion: 2.8926264860972437
J en la  30 -ésima iteracion: 4.241779711317313
J en la  40 -ésima iteracion: 0.724206335518679
J en la  50 -ésima iteracion: 1.8101018272204936
J en la  60 -ésima iteracion: 2.2378324436264454
J en la  70 -ésima iteracion: 0.5963814601246341
J en la  80 -ésima iteracion: 3.278922590723895
J en la  90 -ésima iteracion: 1.3027945112618138


In [15]:
def binary(x, y, w, b ,imagen):
  p = sigmoid(np.matmul(w.T, x)+b)
  ytst = np.matmul(w.T, x)+b
  ytst = -(ytst/100) #Usamos la función de activación con los pesos finales
  if ytst[:1,imagen]< 0.5:
    ytst[:1,imagen]=0 #Evaluamos en la imagen específica
    print('La imagen ',imagen,' es un perro')
  if ytst[:1,imagen] > 0.5:
    ytst[:1,imagen]=1
    print('La imagen ',imagen,' es un gato')
  return ytst

In [16]:
twst2 = binary(train_set_data, train_set_label, w, b, 75) #Calcula todas las predicciones del dataset, pero solo imprime la que se pasa en el último arg.

La imagen  75  es un perro


In [17]:
twst2 = binary(train_set_data, train_set_label, w, b, 79)

La imagen  79  es un perro


In [18]:
#plt.imshow(train_data[79])

In [19]:
testing_precission = binary(test_set_data, test_label, w, b, 1) #Aunque solo imprime la imagen seleccionada en el último argumento de la función
                                                                #Realmente está calculando la predicción en todas las imágenes del dataset

La imagen  1  es un perro


In [43]:
def precission(data, labels):
  y_test = np.zeros_like(data[0])
  for i in range(len(data[0].shape)):
    if data[0, i] > 0.5:
      y_test[i]=1
    else:
      y_test[i]=0

  from sklearn.metrics import accuracy_score

  accuracy = accuracy_score(y_test, labels)
  print('La precisión es del ',accuracy*100,'%')

  return accuracy

In [44]:
precission = precission(testing_precission, test_label)

La precisión es del  50.5256648113791 %


In [22]:
import glob
import numpy as np
import copy
import matplotlib.pyplot as plt
import h5py
import scipy
from PIL import Image
from scipy import ndimage
from sklearn.linear_model import LogisticRegression
from sklearn import preprocessing
%matplotlib inline

In [23]:
all_data = np.array([
    np.asarray(Image.open(file).resize((size, size)))
    for file in files
])

all_label = np.array([
    1 if 'dog' in file else 0
    for file in files
])

In [31]:
all_data_flatten = all_data.reshape(all_data.shape[0], -1).T
#all_set_label=all_label.reshape((1,all_label.shape[0])) #Unnecesary

In [29]:
all_data_flatten.shape, all_label.shape

((12288, 1817), (1817,))

In [37]:
scaler=preprocessing.StandardScaler().fit(all_data_flatten)
scaled_data = scaler.transform(all_data_flatten)
scaled_data.shape

(12288, 1817)

In [38]:
from sklearn.model_selection import train_test_split
x_train_test, x_val, y_train_test, y_val = train_test_split(scaled_data.T,all_label, test_size=0.10, random_state=42,shuffle= True)

In [39]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Dropout

In [40]:
tf.random.set_seed(1234)  # applied to achieve reproducible results
model = Sequential(
    [
        tf.keras.Input(shape=(x_train_test.shape[1],)),
        Dense(1, activation='sigmoid', name = 'layer1'),
     ]
)

model.summary()

In [41]:
model.compile(
    loss = tf.keras.losses.BinaryCrossentropy(),
    optimizer='sgd',
    metrics=[tf.keras.metrics.BinaryAccuracy()]
)

In [42]:
model.fit(
    x_train_test,y_train_test,
    batch_size=20,
    epochs=10,
    validation_split=0.1
)

y_pred_test = model.predict(x_val)
y_pred_test = (y_pred_test >= 0.5).astype(int)
print("Accuracy: {}%".format(100 - np.mean(np.abs(y_pred_test - y_val)) * 100))

Epoch 1/10
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - binary_accuracy: 0.5427 - loss: 1.3850 - val_binary_accuracy: 0.5488 - val_loss: 1.6108
Epoch 2/10
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - binary_accuracy: 0.5996 - loss: 1.3013 - val_binary_accuracy: 0.5061 - val_loss: 2.4773
Epoch 3/10
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - binary_accuracy: 0.6581 - loss: 1.0558 - val_binary_accuracy: 0.4939 - val_loss: 2.5846
Epoch 4/10
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - binary_accuracy: 0.6816 - loss: 0.9038 - val_binary_accuracy: 0.4939 - val_loss: 1.8713
Epoch 5/10
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - binary_accuracy: 0.7106 - loss: 0.7909 - val_binary_accuracy: 0.5244 - val_loss: 1.9599
Epoch 6/10
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - binary_accuracy: 0.7472 - loss: 0.6766 - val