In [1]:
from IPython.display import display
import os
import zipfile
import random
import tensorflow as tf
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.models import Model
from shutil import copyfile
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import requests


import warnings
warnings.filterwarnings('ignore')







In [2]:


url = "https://download.microsoft.com/download/3/E/1/3E1C3F21-ECDB-4869-8368-6DEBA77B919F/kagglecatsanddogs_5340.zip"  # URL real do arquivo
response = requests.get(url)
response.raise_for_status()  # Verifica se houve erro no download

with open("kagglecatsanddogs_5340.zip", "wb") as f:  # Nome do arquivo a ser salvo
    f.write(response.content)

print(f"Arquivo 'kagglecatsanddogs_5340.zip' baixado com sucesso!")

with zipfile.ZipFile("kagglecatsanddogs_5340.zip", 'r') as zip_ref:
    zip_ref.extractall("PetImages")  # Pasta de destino para extração

print(f"Arquivo 'kagglecatsanddogs_5340.zip' extraído para a pasta 'PetImages'")

Arquivo 'kagglecatsanddogs_5340.zip' baixado com sucesso!
Arquivo 'kagglecatsanddogs_5340.zip' extraído para a pasta 'PetImages'


In [3]:
# Diretório base
base_dir = 'PetImages'

# Diretórios para treinamento, validação e teste
train_dir = os.path.join(base_dir, 'training')
validation_dir = os.path.join(base_dir, 'validation')
test_dir = os.path.join(base_dir, 'testing')

# Diretórios para gatos e cães em cada conjunto
train_cats_dir = os.path.join(train_dir, 'cats')
train_dogs_dir = os.path.join(train_dir, 'dogs')
validation_cats_dir = os.path.join(validation_dir, 'cats')
validation_dogs_dir = os.path.join(validation_dir, 'dogs')
test_cats_dir = os.path.join(test_dir, 'cats')
test_dogs_dir = os.path.join(test_dir, 'dogs')

In [4]:
# Criar os diretórios
os.makedirs(train_cats_dir, exist_ok=True)
os.makedirs(train_dogs_dir, exist_ok=True)
os.makedirs(validation_cats_dir, exist_ok=True)
os.makedirs(validation_dogs_dir, exist_ok=True)
os.makedirs(test_cats_dir, exist_ok=True)
os.makedirs(test_dogs_dir, exist_ok=True)

In [5]:
# Dividir as imagens
def split_data(SOURCE, TRAINING, TESTING, SPLIT_SIZE):
    files = []
    for filename in os.listdir(SOURCE):
        file = SOURCE + filename
        if os.path.getsize(file) > 0:
            files.append(filename)
        else:
            print(filename + " is zero length, so ignoring.")

    training_length = int(len(files) * SPLIT_SIZE)
    testing_length = int(len(files) - training_length)
    shuffled_set = random.sample(files, len(files))
    training_set = shuffled_set[0:training_length]
    testing_set = shuffled_set[training_length:]

    for filename in training_set:
        this_file = SOURCE + filename
        destination = TRAINING + filename
        copyfile(this_file, destination)

    for filename in testing_set:
        this_file = SOURCE + filename
        destination = TESTING + filename
        copyfile(this_file, destination)

CAT_SOURCE_DIR = "PetImages/PetImages/Cat/"
TRAINING_CATS_DIR = "PetImages/training/cats/"
TESTING_CATS_DIR = "PetImages/testing/cats/"
DOG_SOURCE_DIR = "PetImages/PetImages/Dog/"
TRAINING_DOGS_DIR = "PetImages/training/dogs/"
TESTING_DOGS_DIR = "PetImages/testing/dogs/"

split_size = .9
split_data(CAT_SOURCE_DIR, TRAINING_CATS_DIR, TESTING_CATS_DIR, split_size)
split_data(DOG_SOURCE_DIR, TRAINING_DOGS_DIR, TESTING_DOGS_DIR, split_size)

666.jpg is zero length, so ignoring.
11702.jpg is zero length, so ignoring.


In [None]:
# Dividir os dados de teste em validação e teste (50/50)
def split_test_data(SOURCE, VALIDATION, TESTING, SPLIT_SIZE):
    files = []
    for filename in os.listdir(SOURCE):
        file = SOURCE + filename
        if os.path.getsize(file) > 0:
            files.append(filename)

In [None]:
# Carregar o modelo VGG16 pré-treinado (sem as camadas de classificação)
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(150, 150, 3))

In [None]:
# Congelar as camadas da base para evitar que sejam treinadas
for layer in base_model.layers:
    layer.trainable = False

# Adicionar camadas personalizadas para classificação
x = base_model.output
x = Flatten()(x)
x = Dense(256, activation='relu')(x)
predictions = Dense(1, activation='sigmoid')(x)  # Saída sigmoid para classificação binária

In [None]:
# Criar o modelo
model = Model(inputs=base_model.input, outputs=predictions)

# Compilar o modelo, usando learning_rate em vez de lr
model.compile(optimizer=RMSprop(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Criar geradores de dados para treinamento e validação
train_datagen = ImageDataGenerator(rescale=1./255, shear_range=0.2, zoom_range=0.2, horizontal_flip=True)
validation_datagen = ImageDataGenerator(rescale=1./255)


train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(150, 150),
    batch_size=10,
    class_mode='binary')

validation_generator = validation_datagen.flow_from_directory(
    validation_dir,
    target_size=(150, 150),
    batch_size=10,
    class_mode='binary')

In [None]:
# Treinar o modelo
history = model.fit(
    train_generator,
    steps_per_epoch=len(train_generator),  # Número de batches por época
    epochs=10,  # Número de épocas
    validation_data=validation_generator,
    validation_steps=len(validation_generator))  # Número de batches de validação

In [None]:
test_datagen = ImageDataGenerator(rescale=1./255)  # Pré-processamento similar ao de validação
test_generator = test_datagen.flow_from_directory(
       test_dir,
       target_size=(150, 150),  # Manter o mesmo tamanho da imagem que o treinamento
       batch_size=10,  # Pode ajustar conforme necessário
       class_mode='binary')  # Manter o mesmo modo de classe que o treinamento

In [None]:
# Avaliar o modelo nos dados de teste
test_loss, test_acc = model.evaluate(test_generator, steps=50)
print('Test accuracy:', test_acc)


In [None]:
plt.plot(history.history['loss'], label='Training Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training Loss over Epochs')
plt.legend()
plt.show()

In [None]:
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Training Accuracy over Epochs')
plt.legend()
plt.show()

In [None]:
fig, ax1 = plt.subplots()

color = 'tab:red'
ax1.set_xlabel('Epoch')
ax1.set_ylabel('Loss', color=color)
ax1.plot(history.history['loss'], label='Training Loss', color=color)
ax1.tick_params(axis='y', labelcolor=color)

ax2 = ax1.twinx()  # instanciar um segundo eixo que compartilha o mesmo eixo x

color = 'tab:blue'
ax2.set_ylabel('Accuracy', color=color)  # 
ax2.plot(history.history['accuracy'], label='Training Accuracy', color=color)
ax2.tick_params(axis='y', labelcolor=color)

fig.tight_layout() 
plt.title('Training Loss and Accuracy')
plt.legend()
plt.show()