# Explore here

In [None]:
#Los IMPORTS
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import seaborn as sns
import json
import random

import requests
import zipfile
import io

import os
from PIL import Image
import shutil
from sklearn.model_selection import train_test_split

from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import load_img, img_to_array, save_img
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from collections import Counter
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping



In [None]:
#Importar desde ZIP al proyecto y descomprimir
url = "https://storage.googleapis.com/datascience-materials/dogs-vs-cats.zip"
response = requests.get(url)
response.raise_for_status()
with zipfile.ZipFile(io.BytesIO(response.content)) as z:
    z.extractall("/workspaces/Leonel_Ponce_Deep_learning_project/data/Repertorio")

In [None]:
#Visualizamos los primeras 9 imágenes de perros
origen = "/workspaces/Leonel_Ponce_Deep_learning_project/data/Repertorio/dogs-vs-cats/all"

perretes = [f for f in os.listdir(origen) if f.startswith("dog")] #Filtramos por perros
dog = perretes[:9]

plt.figure(figsize=(10, 10))
for i, filename in enumerate(dog):
    img_path = os.path.join(origen, filename)
    img = Image.open(img_path)

    plt.subplot(3, 3, i + 1)
    plt.imshow(img)
    plt.title(filename)
    plt.axis("off")

plt.tight_layout()
plt.show()

In [None]:
#Visualizamos los primeras 9 imágenes de gatos
origen = "/workspaces/Leonel_Ponce_Deep_learning_project/data/Repertorio/dogs-vs-cats/all"
michis = [f for f in os.listdir(origen) if f.startswith("cat")] #Filtramos por perros
cat = michis[:9]

plt.figure(figsize=(10, 10))
for i, filename in enumerate(cat):
    img_path = os.path.join(origen, filename)
    img = Image.open(img_path)

    plt.subplot(3, 3, i + 1)
    plt.imshow(img)
    plt.title(filename)
    plt.axis("off")

plt.tight_layout()
plt.show()

In [None]:
#Ordenamos directorio para analizar
origen = "/workspaces/Leonel_Ponce_Deep_learning_project/data/Repertorio/dogs-vs-cats/all" 

#Crear carpetas
train_perretes_folder = os.path.join(origen, "train/perretes")
train_michis_folder   = os.path.join(origen, "train/michis")
test_perretes_folder  = os.path.join(origen, "test/perretes")
test_michis_folder    = os.path.join(origen, "test/michis")

for folder in [train_perretes_folder, train_michis_folder, test_perretes_folder, test_michis_folder]:
    os.makedirs(folder, exist_ok=True)

# Listar imágenes sueltas
ima_complete = [f for f in os.listdir(origen) if f.lower().endswith((".jpg", ".jpeg", ".png"))]

# Separar por clase según nombre
perretes = [img for img in ima_complete if img.startswith("dog")]
michis = [img for img in ima_complete if img.startswith("cat")]

In [None]:
#Separamos en train y test
train_perretes, test_perretes = train_test_split(perretes, test_size=0.2, random_state=42)
train_michis, test_michis = train_test_split(michis, test_size=0.2, random_state=42)

def mover(lista, destino):
    for img in lista:
        shutil.move(os.path.join(origen, img),
                    os.path.join(destino, img))

mover(train_perretes, train_perretes_folder)
mover(test_perretes,  test_perretes_folder)
mover(train_michis,   train_michis_folder)
mover(test_michis,    test_michis_folder)


In [None]:
nombre_ima = os.listdir(origen)
etiquetas = [0 if f.startswith("cat") else 1 for f in nombre_ima]  # 0 = gato, 1 = perro

train_files, test_files, train_labels, test_labels = train_test_split(
    nombre_ima, etiquetas, test_size=0.2, random_state=42, stratify=etiquetas
)

print("Train:", len(train_files), "Test:", len(test_files))

In [None]:
#Enlistar imágenes del dataset
ima_complete = os.listdir(origen)
perretes = [f for f in ima_complete if f.startswith("dog")]
gatetes = [f for f in ima_complete if f.startswith("cat")]

#Dividimos el dataset y movemos según categoria
def split_and_move(images, label):
    random.shuffle(images)
    split_idx = int(0.8 * len(images))  # 80% train, 20% test
    train_ima = images[:split_idx]
    test_ima = images[split_idx:]
    
    
    for img in train_ima:
        shutil.copy(os.path.join(origen, img), os.path.join(origen, "train", label, img))
    for img in test_ima:
        shutil.copy(os.path.join(origen, img), os.path.join(origen, "test", label, img))

split_and_move(perretes, "dog")
split_and_move(gatetes, "cat")

In [None]:

trn_database = ImageDataGenerator(rescale=1./255)
tst_database = ImageDataGenerator(rescale=1./255)

train_database = trn_database.flow_from_directory(
    "/workspaces/Leonel_Ponce_Deep_learning_project/data/Repertorio/dogs-vs-cats/all/train",              
    target_size=(180, 180),    
    batch_size=8,             
    class_mode="binary"        
)

test_database = tst_database.flow_from_directory(
    "/workspaces/Leonel_Ponce_Deep_learning_project/data/Repertorio/dogs-vs-cats/all/test",  
    target_size=(180, 180),
    batch_size=8,
    class_mode="binary"
)

In [None]:
# Información de clases
print("Clases y sus índices:", train_database.class_indices)

# Etiquetas de train y test
train_labels = train_database.classes
test_labels  = test_database.classes

print("Total imágenes en train:", len(train_labels))
print("Distribución por clase en train:", Counter(train_labels))

print("Total imágenes en test:", len(test_labels))
print("Distribución por clase en test:", Counter(test_labels))


In [None]:
model = Sequential([
    Conv2D(16, (3,3), activation='relu', input_shape=(180,180,3)),
    MaxPooling2D(2,2),

    Conv2D(32, (3,3), activation='relu'),
    MaxPooling2D(2,2),

    Flatten(),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

In [None]:
model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

In [None]:
history = model.fit(
    train_database,    
    validation_data=test_database,
    epochs=7
)

# Definir EarlyStopping
early_stop = EarlyStopping(
    monitor='val_loss',     
    patience=5,              
    restore_best_weights=True
)

# Entrenar el modelo con el callback
history = model.fit(
    train_database,    
    validation_data=test_database,
    epochs=7,               
    callbacks=[early_stop]    
)

Epoch 8/10
2500/2500 ━━━━━━━━━━━━━━━━━━━━ 513s 186ms/step - accuracy: 0.9635 - loss: 0.1007 - val_accuracy: 0.7774 - val_loss: 0.9307
Epoch 9/10
1760/2500 ━━━━━━━━━━━━━━━━━━━━ 1:52 152ms/step - accuracy: 0.9698 - loss: 0.0831
--------
Se acaba la memoria en el epoch 9/10, conforme sube en el epoch mejora el accuracy, lo detendré en el epoch 7