In [2]:
# Using CNN with the Cats vs Dogs Dataset

import os
import zipfile
import random
import shutil
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from shutil import copyfile
import matplotlib.pyplot as plt

In [11]:
# skidamo dataset (kao zip fajl) u tmp folderu, a zatim ga otkapujemo u tom istom folderu (Pet images)
!wget --no-check-certificate \
    "https://download.microsoft.com/download/3/E/1/3E1C3F21-ECDB-4869-8368-6DEBA77B919F/kagglecatsanddogs_5340.zip" \
    -O "/tmp/cats-and-dogs.zip"

local_zip = '/tmp/cats-and-dogs.zip'
zip_ref   = zipfile.ZipFile(local_zip, 'r')
zip_ref.extractall('/tmp')
zip_ref.close()

--2023-03-06 21:43:05--  https://download.microsoft.com/download/3/E/1/3E1C3F21-ECDB-4869-8368-6DEBA77B919F/kagglecatsanddogs_5340.zip
Resolving download.microsoft.com (download.microsoft.com)... 23.34.248.153, 2600:1408:c400:168a::317f
Connecting to download.microsoft.com (download.microsoft.com)|23.34.248.153|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 824887076 (787M) [application/octet-stream]
Saving to: ‘/tmp/cats-and-dogs.zip’


2023-03-06 21:43:09 (185 MB/s) - ‘/tmp/cats-and-dogs.zip’ saved [824887076/824887076]



In [4]:
# kreiramo funckiju koja ce kreirati neophodne direktorijume
def create_train_val_dirs(root_path):

  base_dir = root_path
  os.makedirs(root_path)

  training_dir = os.path.join(base_dir, 'training')
  os.makedirs(training_dir)

  validation_dir = os.path.join(base_dir, 'validation')
  os.makedirs(validation_dir)
  
  training_cats_dir = os.path.join(training_dir, 'cats')
  os.makedirs(training_cats_dir)

  training_dogs_dir = os.path.join(training_dir, 'dogs')
  os.makedirs(training_dogs_dir)

  validation_cats_dir = os.path.join(validation_dir, 'cats')
  os.makedirs(validation_cats_dir)

  validation_dogs_dir = os.path.join(validation_dir, 'dogs')
  os.makedirs(validation_dogs_dir)

In [5]:
# definisemo funkciju koja ce podeliti podatke za trening i validaciju
def split_data(SOURCE_DIR, TRAINING_DIR, VALIDATION_DIR, SPLIT_SIZE):
  
  # izvlacimo podatke iz SOURCE_DIR
  sd = os.listdir(SOURCE_DIR)
  # brisemo sve podatke cija je velicina jednaka 0
  for fname in sd:
    if os.path.getsize(os.path.join(SOURCE_DIR, fname)) <= 0:
      print(fname, "is zero length, so ignoring.")
      sd.remove(fname)
  
  # definisemo velicinu training i validation seta
  training_size = round(len(sd) * SPLIT_SIZE)
  validation_size = round(len(sd) * (1 - SPLIT_SIZE))

  # shuffle-ujemo podatke 
  random.sample(sd, len(sd))

  # upisujemo podatke u training i validation sample
  training_sample = random.sample(sd[:training_size], training_size)
  validation_sample = random.sample(sd[training_size:], validation_size)

  # upisujemo podatke u odgovarajuce direktorijume
  for fname in training_sample:
    copyfile(os.path.join(SOURCE_DIR, fname), os.path.join(TRAINING_DIR, fname))

  for fname in validation_sample:
    copyfile(os.path.join(SOURCE_DIR, fname), os.path.join(VALIDATION_DIR, fname))

In [6]:
# kreiramo training i validation data generatore
def train_val_generators(TRAINING_DIR, VALIDATION_DIR):
  
  # instanciramo ImageDataGenerator klasu
  train_datagen = ImageDataGenerator(rescale = 1./255)

  # kreiramo training generator
  train_generator = train_datagen.flow_from_directory(directory=TRAINING_DIR,
                                                      batch_size=10,
                                                      class_mode='binary',
                                                      target_size=(150, 150))
  
  # instanciramo ImageGenerator klasu za validation data
  validation_datagen = ImageDataGenerator(rescale = 1./255)

  # kreiramo validation generator
  validation_generator = validation_datagen.flow_from_directory(directory=VALIDATION_DIR,
                                                                batch_size=10,
                                                                class_mode='binary',
                                                                target_size=(150, 150))
  
  return train_generator, validation_generator

In [7]:
# kreiramo model

from tensorflow.keras.optimizers import RMSprop
def create_model():

  model = tf.keras.models.Sequential([ 
      # kreiramo prvi Convolution layer
      tf.keras.layers.Conv2D(32, (3,3), activation='relu', input_shape=(150,150,3)),
      tf.keras.layers.MaxPooling2D(2,2),
      
      # kreiramo drugi Convolution layer
      tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
      tf.keras.layers.MaxPooling2D(2,2),

      # kreiramo treci Convolution layer
      tf.keras.layers.Conv2D(128, (3,3), activation='relu'),
      tf.keras.layers.MaxPooling2D(2,2),

      # pretvaramo matricu u 1-dimenzionalni niz
      tf.keras.layers.Flatten(),
      # definisemo sloj sa 512 neurona i aktivacionom funkcijom relu
      tf.keras.layers.Dense(512, activation='relu'),
      # definisemo 1 izlazni neuron sa aktivacioniom funkcijom sigmoid
      tf.keras.layers.Dense(1, activation='sigmoid'),
  ])


  model.compile(optimizer=RMSprop(learning_rate=0.001),
                loss='binary_crossentropy',
                metrics=['accuracy']) 

  return model

In [8]:
# definisemo root direktorijum
root_dir = '/tmp/cats-v-dogs'

# ako podaci u direktorijumu vec postoje, obrisi ih (korisceno za potrebe testiranja)
if os.path.exists(root_dir):
  shutil.rmtree(root_dir)
  
# pozivamo funkciju za kreiranje direktorijuma
create_train_val_dirs(root_path=root_dir)

# definisemo putanje 
CAT_SOURCE_DIR = "/tmp/PetImages/Cat/"
DOG_SOURCE_DIR = "/tmp/PetImages/Dog/"
TRAINING_DIR = "/tmp/cats-v-dogs/training/"
VALIDATION_DIR = "/tmp/cats-v-dogs/validation/"
TRAINING_CATS_DIR = os.path.join(TRAINING_DIR, "cats/")
VALIDATION_CATS_DIR = os.path.join(VALIDATION_DIR, "cats/")
TRAINING_DOGS_DIR = os.path.join(TRAINING_DIR, "dogs/")
VALIDATION_DOGS_DIR = os.path.join(VALIDATION_DIR, "dogs/")

# definisemo split_size
split_size = 0.9

In [9]:
# pozivamo funkciju za podelu podataka
split_data(CAT_SOURCE_DIR, TRAINING_CATS_DIR, VALIDATION_CATS_DIR, split_size)
split_data(DOG_SOURCE_DIR, TRAINING_DOGS_DIR, VALIDATION_DOGS_DIR, split_size)

# pozivamo funkciju za kreiranje generatora
train_generator, validation_generator = train_val_generators(TRAINING_DIR, VALIDATION_DIR)

# kreiramo model
model = create_model()

# treniramo model
history = model.fit(train_generator,
                    epochs=15,
                    verbose=1,
                    validation_data=validation_generator)

666.jpg is zero length, so ignoring.
11702.jpg is zero length, so ignoring.
Found 22498 images belonging to 2 classes.
Found 2500 images belonging to 2 classes.
Epoch 1/15
  45/2250 [..............................] - ETA: 1:19 - loss: 0.8535 - accuracy: 0.5311



Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
