# Proyecto clasificación de imágenes

In [3]:
# Bibliotecas necesarias

import json
import zipfile
import os
import pandas as pd
from PIL import Image
import random
import numpy as np
import tensorflow as tf
from tensorflow import keras
from keras.preprocessing import image
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPool2D

In [5]:
root_path = '/content'

### Cargamos Kaggle y a travez de la Api descargamos el conjunto de datos

In [4]:
!pip install kaggle
!mkdir ~/.kaggle
!touch '/root/.kaggle/kaggle.json'



In [6]:
## Appi token
api_token = {"username":"jairoalmanza","key":"4054e3a9b3a8e7c76e969e3fa1132644"}

In [7]:
with open("/root/.kaggle/kaggle.json", "w") as file:
 json.dump(api_token, file)
!chmod 600 /root/.kaggle/kaggle.json

In [8]:
# Descargar data
!kaggle competitions download -c dogs-vs-cats

Downloading dogs-vs-cats.zip to /content
 97% 785M/812M [00:03<00:00, 265MB/s]
100% 812M/812M [00:04<00:00, 209MB/s]


In [9]:
# Extraer data de zip
for file in os.listdir():
 if file.endswith(".zip"):
  zip_ref = zipfile.ZipFile(file, "r")
  zip_ref.extractall()
  zip_ref.close()

In [10]:
import os
import cv2
import matplotlib.pyplot as plt
# Define los caminos a los archivos zip
train_zip_path = '/content/train.zip'
test_zip_path = '/content/test1.zip'
# Define los directorios donde se descomprimirán los archivos
train_dir = '/content'
test_dir = '/content'
# Función para descomprimir archivos zip
def unzip_file(zip_path, extract_to):
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_to)
# Descomprime los archivos zip
unzip_file(train_zip_path, train_dir)
unzip_file(test_zip_path, test_dir)

### Carga del conjunto de datos

Datos de train

In [7]:
# Set the directory path where your images are located
image_dir = '/content/train/'

# Create a list to store the image vectors. Conjunto X_train
X_train = []
im=0
# Loop through all the images in the directory
for filename in os.listdir(image_dir):
    im+=1
    print(f'imagen: {im}', end='\r')

    if filename.endswith(".jpg"):

        # Open the image using PIL
        img = Image.open(os.path.join(image_dir, filename))

        # Resize the image to 200x200 pixels
        img = img.resize((200, 200))

        # Convirtiendo la imagen to a numpy array
        img_array = np.array(img)

        # Reshape la imagen en la forma adecuada para el modelo
        img_tensor = img_array.reshape((200, 200, 3))

        # Append the vector to the list
        X_train.append(img_tensor)

# Convert the list to a numpy array
X_train = np.array(X_train)

print(X_train.shape)

(25000, 200, 200, 3)


In [8]:
# Normalizar X_train
X_train = np.divide(X_train, 255.0)

In [9]:
# Initialize lists to store labels. Si imagen Cat entonces y=1 si no 0 / Si imagen Dog entonces y=1 si no 0
y_train = []

for file in os.listdir(image_dir):
    # Check if the file is an image
    if file.endswith('.jpg'):
        # Create the label
        if file.startswith('cat'):
            y_train.append([1, 0])
        elif file.startswith('dog'):
            y_train.append([0, 1])


# Convert lists to numpy arrays
y_train = np.array(y_train)

print(y_train.shape)

(25000, 2)


Datos de Test

In [10]:
# Set the directory path where your images are located
image_dir_test = '/content/test1/'

# Create a list to store the image vectors. Conjunto X_train
X_test = []
im1=0
# Loop through all the images in the directory
for filename_ts in os.listdir(image_dir_test):
    im1+=1
    print(f'imagen: {im1}', end='\r')
    if filename_ts.endswith(".jpg"):
        # Open the image using PIL
        imgts = Image.open(os.path.join(image_dir_test, filename_ts))

        # Resize the image to 200x200 pixels (just in case)
        imgts = imgts.resize((200, 200))

        # Convert the image to a numpy array
        img_arrayts = np.array(imgts)

        # Reshape la imagen en la forma adecuada para el modelo
        img_tensorts = img_arrayts.reshape((200, 200, 3))

        # Append the vector to the list
        X_test.append(img_tensorts)

# Convert the list to a numpy array
X_test = np.array(X_test)

print(X_test.shape)

(12500, 200, 200, 3)


In [11]:
# Normalizar X_test
X_test = np.divide(X_test, 255.0)

In [16]:
np.save('X_train.npy', X_train)
np.save('y_train.npy', y_train)
np.save('X_test.npy', X_test)

### Construye una RNA

In [4]:
# Define the model architecture
model = Sequential()
model.add(Conv2D(input_shape=(200, 200, 3), filters=64, kernel_size=(3, 3), padding="same", activation="relu"))
model.add(Conv2D(filters=64, kernel_size=(3, 3), padding="same", activation="relu"))
model.add(MaxPool2D(pool_size=(2, 2), strides=(2, 2)))
model.add(Conv2D(filters=128, kernel_size=(3, 3), padding="same", activation="relu"))
model.add(Conv2D(filters=128, kernel_size=(3, 3), padding="same", activation="relu"))
model.add(MaxPool2D(pool_size=(2, 2), strides=(2, 2)))
model.add(Conv2D(filters=256, kernel_size=(3, 3), padding="same", activation="relu"))
model.add(Conv2D(filters=256, kernel_size=(3, 3), padding="same", activation="relu"))
model.add(Conv2D(filters=256, kernel_size=(3, 3), padding="same", activation="relu"))
model.add(MaxPool2D(pool_size=(2, 2), strides=(2, 2)))
model.add(Conv2D(filters=512, kernel_size=(3, 3), padding="same", activation="relu"))
model.add(Conv2D(filters=512, kernel_size=(3, 3), padding="same", activation="relu"))
model.add(Conv2D(filters=512, kernel_size=(3, 3), padding="same", activation="relu"))
model.add(MaxPool2D(pool_size=(2, 2), strides=(2, 2)))
model.add(Conv2D(filters=512, kernel_size=(3, 3), padding="same", activation="relu"))
model.add(Conv2D(filters=512, kernel_size=(3, 3), padding="same", activation="relu"))
model.add(Conv2D(filters=512, kernel_size=(3, 3), padding="same", activation="relu"))
model.add(MaxPool2D(pool_size=(2, 2), strides=(2, 2)))
model.add(Flatten())
model.add(Dense(units=4096, activation="relu"))
model.add(Dense(units=4096, activation="relu"))
model.add(Dense(units=2, activation="softmax"))

In [12]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [13]:
model.fit(X_train, y_train, epochs=1, batch_size=128, validation_split=0.2)



<keras.src.callbacks.History at 0x79adb6a4faf0>

In [15]:
_, accuracy1 = model.evaluate(X_train, y_train)

print(f"El Accuracy: {accuracy1}")

El Accuracy: 0.5


### Predicción del modelo

In [16]:
y_test=model.predict(X_test)



Debido al costo computacional solo pudimos realizar el entrenamiento del modelo con epochs=1 logrando un Accuracy de 0.5