# Katt eller hund?

In [None]:
## Plotta de tre första hundarna och katterna
### Finns i samma folder

In [None]:
import matplotlib.pyplot as plt
from PIL import Image
from pathlib import Path

plt.figure(figsize=(12, 8))
for i in range(3):
    image = Image.open(Path("dogs_vs_cats_train") / ("dog." + str(i) + ".jpg"))
    plt.subplot(2, 3, i + 1)
    plt.axis('off')
    plt.imshow(image)
for i in range(0, 3):
    image = Image.open(Path("dogs_vs_cats_train") / ("cat." + str(i) + ".jpg"))
    plt.subplot(2, 3, i + 4)
    plt.axis('off')
    plt.imshow(image)

## Skapa två foldrar "dogs_vs_cats_1000" och "dogs_vs_cats_500" med bilder

In [None]:
# Du behöver bara köra denna cell en gång
import os, shutil
from pathlib import Path

original_dir = Path("dogs_vs_cats_train")
new_base_dir = Path("dogs_vs_cats_1000")

def make_subset(subset_name, start_index, end_index):
    for category in ("cat", "dog"):
        dir = new_base_dir / subset_name / category
        os.makedirs(dir)
        fnames = [f"{category}.{i}.jpg" for i in range(start_index, end_index)]
        for fname in fnames:
            shutil.copyfile(src=original_dir / fname,
                            dst=dir / fname)

# folder med 1000 filer för träning
shutil.rmtree(new_base_dir, ignore_errors=True)
make_subset("train", start_index=0, end_index=500)
make_subset("validation", start_index=500, end_index=750)
make_subset("test", start_index=750, end_index=1250)

# folder med 500 filer för träning 
new_base_dir = Path("dogs_vs_cats_500")
shutil.rmtree(new_base_dir, ignore_errors=True)
make_subset("train", start_index=0, end_index=250)
make_subset("validation", start_index=500, end_index=625)
make_subset("test", start_index=750, end_index=1250)


## Funktion för att skapa datset för träning, validering och test

In [None]:
def create_datasets(new_base_dir, size):
    from tensorflow.keras.utils import image_dataset_from_directory
    from pathlib import Path

    print('Training')
    train = image_dataset_from_directory(
        new_base_dir / "train",
        image_size=size,
        batch_size=32)
    print('Validation')
    validation = image_dataset_from_directory(
        new_base_dir / "validation",
        image_size=size,
        batch_size=32)
    print('Test')
    test = image_dataset_from_directory(
        new_base_dir / "test",
        image_size=(size),
        batch_size=32)
    return train, validation, test

# dogs_vs_cats_1000

In [None]:
# skapa dataset från "dogs_vs_cats_1000"
train_dataset, validation_dataset , test_dataset = create_datasets(Path("dogs_vs_cats_1000"), (160,160))

In [None]:
for data_batch, labels_batch in train_dataset:
    print("data batch shape:", data_batch.shape)
    print("labels batch shape:", labels_batch.shape)
    break

## Definiera en enkel modell som använder "data augmentation"

In [None]:
from tensorflow import keras
from tensorflow.keras import layers

data_augmentation = keras.Sequential(
    [
        layers.RandomFlip("horizontal"),
        layers.RandomRotation(0.1),
        layers.RandomZoom(0.2),
    ]
)

In [None]:
 # definiera en enkel modell med 3 conv-lager
from tensorflow import keras
from tensorflow.keras import layers

inputs = keras.Input(shape=(160, 160, 3))
x = data_augmentation(inputs)
x = layers.Rescaling(1./255)(x)
x = layers.Conv2D(filters=32, kernel_size=3, activation="relu")(x)
x = layers.MaxPooling2D(pool_size=2)(x)
x = layers.Conv2D(filters=64, kernel_size=3, activation="relu")(x)
x = layers.MaxPooling2D(pool_size=2)(x)
x = layers.Conv2D(filters=128, kernel_size=3, activation="relu")(x)
x = layers.MaxPooling2D(pool_size=2)(x)
x = layers.Conv2D(filters=128, kernel_size=3, activation="relu")(x)
x = layers.MaxPooling2D(pool_size=2)(x)
x = layers.Flatten()(x)
x = layers.Dropout(0.5)(x)
# sigmoid eftersom modellen har två klasser
outputs = layers.Dense(1, activation="sigmoid")(x)
model3 = keras.Model(inputs=inputs, outputs=outputs)

# binary_crossentropy eftersom modellen har 2 klasser
model3.compile(loss="binary_crossentropy",
              optimizer="rmsprop",
              metrics=["accuracy"])

In [None]:
model3.summary()

## Träna vår enkla modell från scratch (1000 filer)
### Ungefär 30 sekunder per epok => 50 minuter

In [None]:
callbacks = [
    keras.callbacks.ModelCheckpoint(
        filepath="model3_from_scratch.keras",
        save_best_only=True,
        monitor="val_loss")
]
history = model3.fit(
    train_dataset,
    epochs=100,
    validation_data=validation_dataset,
    callbacks=callbacks)

In [None]:
# ladda och utvärdera bästa modellen (0.8250 med 2000 filer)
test_model = keras.models.load_model("model3_from_scratch.keras")
test_loss, test_acc = test_model.evaluate(test_dataset)
print(f"Test accuracy: {test_acc:.3f}")

In [None]:
# funktion som plottar tränings-historiken
def plot_acc_loss():
    import matplotlib.pyplot as plt
    acc = history.history["accuracy"]
    val_acc = history.history["val_accuracy"]
    loss = history.history["loss"]
    val_loss = history.history["val_loss"]
    epochs = range(1, len(acc) + 1)
    plt.plot(epochs, acc, "bo", label="Training accuracy")
    plt.plot(epochs, val_acc, "b", label="Validation accuracy")
    plt.title("Training and validation accuracy")
    plt.legend()
    plt.figure()
    plt.plot(epochs, loss, "bo", label="Training loss")
    plt.plot(epochs, val_loss, "b", label="Validation loss")
    plt.title("Training and validation loss")
    plt.legend()
    plt.show()

In [None]:
plot_acc_loss()

## Mobilenet som 'convolutional base' (1000 filer)

In [None]:
from tensorflow import keras

conv_base = keras.applications.MobileNetV2(
    weights="imagenet",
    include_top=False,
    input_shape=(160, 160, 3))
conv_base.trainable = False

In [None]:
conv_base.summary()

In [None]:
conv_base.trainable = True
print("This is the number of trainable weights "
      "before freezing the conv base:", len(conv_base.trainable_weights))

In [None]:
conv_base.trainable = False
print("This is the number of trainable weights "
      "after freezing the conv base:", len(conv_base.trainable_weights))

In [None]:
from tensorflow import keras
from tensorflow.keras import layers

data_augmentation = keras.Sequential(
    [
        layers.RandomFlip("horizontal"),
        layers.RandomRotation(0.1),
        layers.RandomZoom(0.2),
    ]
)

inputs = keras.Input(shape=(160, 160, 3))
x = data_augmentation(inputs)
x = layers.Rescaling(1./255)(x)
x = conv_base(x)
x = layers.Flatten()(x)
x = layers.Dense(256)(x)
x = layers.Dropout(0.5)(x)
outputs = layers.Dense(1, activation="sigmoid")(x)
model = keras.Model(inputs, outputs)
model.compile(loss="binary_crossentropy",
              optimizer="rmsprop",
              metrics=["accuracy"])

In [None]:
model.summary()

In [None]:
callbacks = [
    keras.callbacks.ModelCheckpoint(
        filepath="feature_extraction1.keras",
        save_best_only=True,
        monitor="val_loss")
]
history = model.fit(
    train_dataset,
    epochs=50,
    validation_data=validation_dataset,
    callbacks=callbacks)

In [None]:
# som jämförelse: 0.9730 med 2000 filer
test_model = keras.models.load_model(
    "feature_extraction1.keras")
test_loss, test_acc = test_model.evaluate(test_dataset)
print(f"Test accuracy: {test_acc:.3f}")

In [None]:
plot_acc_loss()

# Hälften så mycket träningsdata (500 filer)

In [None]:
# skapa dataset från "dogs_vs_cats_500"
train_dataset, validation_dataset , test_dataset = create_datasets(Path("dogs_vs_cats_500"), (160,160))

In [None]:
from tensorflow import keras
from tensorflow.keras import layers

data_augmentation = keras.Sequential(
    [
        layers.RandomFlip("horizontal"),
        layers.RandomRotation(0.1),
        layers.RandomZoom(0.2),
    ]
)

inputs = keras.Input(shape=(160, 160, 3))
x = data_augmentation(inputs)
x = layers.Rescaling(1./255)(x)
x = conv_base(x)
x = layers.Flatten()(x)
x = layers.Dense(256)(x)
x = layers.Dropout(0.5)(x)
outputs = layers.Dense(1, activation="sigmoid")(x)
model = keras.Model(inputs, outputs)
model.compile(loss="binary_crossentropy",
              optimizer="rmsprop",
              metrics=["accuracy"])

In [None]:
callbacks = [
    keras.callbacks.ModelCheckpoint(
        filepath="feature_extraction2.keras",
        save_best_only=True,
        monitor="val_loss")
]
history = model.fit(
    train_dataset,
    epochs=50,
    validation_data=validation_dataset,
    callbacks=callbacks)

In [None]:
test_model = keras.models.load_model(
    "feature_extraction2.keras")
test_loss, test_acc = test_model.evaluate(test_dataset)
print(f"Test accuracy: {test_acc:.3f}")

In [None]:
plot_acc_loss()

## 500 filer + 224x224 bilder

In [None]:
from tensorflow import keras

conv_base = keras.applications.MobileNetV2(
    weights="imagenet",
    include_top=False,
    input_shape=(224, 224, 3))
conv_base.trainable = False

In [None]:
# skapa dataset med bildstorlek 224x2224
train_dataset, validation_dataset , test_dataset = create_datasets(Path("dogs_vs_cats_500"), (224,224))

In [None]:
from tensorflow import keras
from tensorflow.keras import layers

data_augmentation = keras.Sequential(
    [
        layers.RandomFlip("horizontal"),
        layers.RandomRotation(0.1),
        layers.RandomZoom(0.2),
    ]
)

inputs = keras.Input(shape=(224, 224, 3))
x = data_augmentation(inputs)
x = layers.Rescaling(1./255)(x)
x = conv_base(x)
x = layers.Flatten()(x)
x = layers.Dense(256)(x)
x = layers.Dropout(0.5)(x)
outputs = layers.Dense(1, activation="sigmoid")(x)
model = keras.Model(inputs, outputs)
model.compile(loss="binary_crossentropy",
              optimizer="rmsprop",
              metrics=["accuracy"])

In [None]:
callbacks = [
    keras.callbacks.ModelCheckpoint(
        filepath="feature_extraction3.keras",
        save_best_only=True,
        monitor="val_loss")
]
history = model.fit(
    train_dataset,
    epochs=50,
    validation_data=validation_dataset,
    callbacks=callbacks)

In [None]:
test_model = keras.models.load_model(
    "feature_extraction3.keras")
test_loss, test_acc = test_model.evaluate(test_dataset)
print(f"Test accuracy: {test_acc:.3f}")

In [None]:
plot_acc_loss()

# Imagenet utan träning med cats_vs_dogs

### Imagenet klasser: https://gist.github.com/ageitgey/4e1342c10a71981d0b491e1b8227328b
### Katter 281-285, tamhundar 151-268, totalt 1000 klasser

In [None]:
# HELA MobileNetV2
mobile_net = keras.applications.MobileNetV2(
    weights="imagenet",
    include_top=True,
    input_shape=(160, 160, 3))

In [None]:
mobile_net.summary()

In [None]:
hund = 0
for i in range(1500, 2500):
    pimage = plt.imread("cats_vs_dogs_minimal\\test\\dog\\dog." + str(i) + ".jpg")
    # lägg till batch- dimension, normalisera till (0.0, 1.0)
    pimage = pimage.astype(np.float32)[np.newaxis, ...] / 255.
    # ändra storlek till 160x160
    pimage = tf.image.resize(pimage, (160, 160))
    # bestäm klass
    pred = (mobile_net.predict(pimage)).argmax()
    if pred >= 151 and pred <= 268: hund = hund + 1
print(str(hund) + " hundar av 1000 korrekt klassificerade. Noggrannhet=" + str(hund/1000))

In [None]:
katt = 0
for i in range(1500, 2500):
    pimage = plt.imread("cats_vs_dogs_minimal\\test\\cat\\cat." + str(i) + ".jpg")
    pimage = pimage.astype(np.float32)[np.newaxis, ...] / 255.
    pimage = tf.image.resize(pimage, (160, 160))
    pred = (mobile_net.predict(pimage)).argmax()
    if pred >= 281 and pred <= 285: katt = katt + 1
print(str(katt) + " katter av 1000 korrekt klassificerade. Noggrannhet=" + str(katt/1000))

In [None]:
print("Noggrannhet totalt=" + str((hund + katt)/2000))