# Explore here

In [1]:
import requests
import zipfile
import io

url = "https://storage.googleapis.com/datascience-materials/dogs-vs-cats.zip"
response = requests.get(url)
response.raise_for_status()

with zipfile.ZipFile(io.BytesIO(response.content)) as z:
    z.extractall("dogs-vs-cats")

------------------------------------------------------

In [2]:
import os
import shutil

images_folder = "/workspaces/EPablos2013-Deep-Learning/src/dogs-vs-cats"
source_folder = "/workspaces/EPablos2013-Deep-Learning/src/dogs-vs-cats/dogs-vs-cats/train"
cats_folder = "/workspaces/EPablos2013-Deep-Learning/src/dogs-vs-cats/dogs-vs-cats/train/cats"
dogs_folder = "/workspaces/EPablos2013-Deep-Learning/src/dogs-vs-cats/dogs-vs-cats/train/dogs"

# Crear carpetas si no existen
os.makedirs(source_folder, exist_ok=True)
os.makedirs(cats_folder, exist_ok=True)
os.makedirs(dogs_folder, exist_ok=True)

for filename in os.listdir(images_folder):
    file_path = os.path.join(images_folder, filename)
    if os.path.isfile(file_path):
        nombre = filename.lower()
        if nombre.startswith("cat"):
            shutil.copy(file_path, os.path.join(cats_folder, filename))
        elif nombre.startswith("dog"):
            shutil.copy(file_path, os.path.join(dogs_folder, filename))



In [3]:
from keras.preprocessing import image
import numpy as np
import os

def load_and_preprocess_images(data_dir, target_size=(224, 224)):
    images = []
    labels = []
    valid_extensions = ('.jpg', '.jpeg', '.png')

    for label in os.listdir(data_dir):
        label_dir = os.path.join(data_dir, label)
        if os.path.isdir(label_dir):
            for filename in os.listdir(label_dir):
                if not filename.lower().endswith(valid_extensions):
                    continue  # Ignora archivos que no sean imágenes

                img_path = os.path.join(label_dir, filename)
                try:
                    img = image.load_img(img_path, target_size=target_size)
                    img_array = image.img_to_array(img)
                    img_array /= 255.0  # Normalizar los valores de píxeles
                    images.append(img_array)

                    # Asigna la etiqueta 0 para "cat" y 1 para "dog"
                    if label.lower() == "cats":
                        labels.append(0)
                    elif label.lower() == "dogs":
                        labels.append(1)
                except Exception as e:
                    print(f"Error cargando la imagen {img_path}: {e}")

    return np.array(images), np.array(labels)

In [4]:
#CORRECCIÓN

data_dir = "../data/train"

# Cargar imágenes y etiquetas
images, labels = load_and_preprocess_images(data_dir)

# Verifica cuántas se cargaron
print(f"Total imágenes cargadas: {images.shape}")
print(f"Total etiquetas cargadas: {labels.shape}")

# Dividir en train/test si hay datos
if len(images) > 0:
    from sklearn.model_selection import train_test_split
    from keras.utils import to_categorical

    train_images, test_images, train_labels, test_labels = train_test_split(
        images, labels, test_size=0.2, random_state=42
    )

    train_labels = to_categorical(train_labels, num_classes=2)
    test_labels = to_categorical(test_labels, num_classes=2)

    print("Conjunto de datos dividido correctamente.")
else:
    print("No se cargaron imágenes. Verifica la ruta o el contenido del directorio.")

FileNotFoundError: [Errno 2] No such file or directory: '../data/train'

In [None]:
from sklearn.model_selection import train_test_split

train_images, test_images, train_labels, test_labels = train_test_split(images, labels, test_size=0.2, random_state=42)

from keras.utils import to_categorical

train_labels = to_categorical(train_labels, num_classes=2)
test_labels = to_categorical(test_labels, num_classes=2)

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator()
train_datagen_10_percent = ImageDataGenerator(rescale=1/255.)

train_data = train_datagen_10_percent.flow_from_directory(directory="/workspaces/EPablos2013-Deep-Learning/data/train",
                                                          target_size=(224, 224),
                                                          class_mode='categorical',
                                                          batch_size=32,
                                                          shuffle=True)