# Maskin Læring: Klassifisering av dyr

## Trening av vårt første CNN!
![l](https://www.managers.org.uk/wp-content/uploads/2020/03/BrainTraining.jpg)  
I denne notebooken kommer vi til å gjøre et par ting: 
1. Skrive inn studentnummeret vårt
2. Generere et nevralt nettverk(tenk en hjerne) med dette studentnummeret. 
3. Trene dette nettverket

### Vi starter med å importere biblioteker og funksjoner

In [2]:
import numpy as np
import pandas as pd
import os

import matplotlib.pyplot as plt
import tensorflow as tf

from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.python.keras.preprocessing.image import ImageDataGenerator

from sklearn.metrics import classification_report, log_loss, accuracy_score
from sklearn.model_selection import train_test_split
!pip3 install tqdm
from tqdm.auto import tqdm

### La oss nå gå inn på kaggle, hente ut api nøkkelen  vår, og legge den inn under.
### Dette gjør sånn at vi kan bruke datasettet som finnes på kaggle.

In [2]:
kaggle_user= ""
kaggle_api = ""
if kaggle_user == "" or kaggle_api == "":
    print("Skriv inn bruker og api-nøkkel!")
else:
    os.environ["KAGGLE_USERNAME"] = kaggle_user
    os.environ["KAGGLE_KEY"] = kaggle_api
    !pip3 install kaggle
    !kaggle datasets download -d alessiocorrado99/animals10

Skriv inn bruker og api-nøkkel!


### Etter å ha lastet ned datasettet kan vi unzippe filen

In [4]:
!unzip -q animals10.zip
print("Complete!")

### Vi bruker studentnummeret som en random seed for å velge aktiveringsfunksjon og learnrate

In [1]:
import random
Student_nr = input("Hva er student nummeret ditt?")
img_size = 100
random.seed(Student_nr)

activation_layers = [
            "relu",
            "selu",
            "elu", 
            "leaky_relu"
]
n = random.randint(1, len(activation_layers))
layer = activation_layers[n]
lr = random.uniform(1e-4, 1e-1)
print(f"Your learning rate is: {lr}")
print(f"Your activation layer is: {layer}")

KeyboardInterrupt: Interrupted by user

## Vi laster inn dataene til et sett for trening, validering og testing

In [6]:
base_dir = './raw-img'
LABELS = os.listdir(base_dir)
LABELS.sort()
translate = {"cane": "dog", "cavallo": "horse", "elefante": "elephant",
                 "farfalla": "butterfly", "gallina": "chicken", "gatto": "cat",
                 "mucca": "cow", "pecora": "sheep", "scoiattolo": "squirrel",
                 "dog": "cane", "cavallo": "horse", "elephant" : "elefante", 
                 "butterfly": "farfalla", "chicken": "gallina", "cat": "gatto",
                 "cow": "mucca", "spider": "ragno", "squirrel": "scoiattolo"}
LABELS_english = list(translate.values())
base_dir + '/' + LABELS[0]
dataset=[]
testset=[]
count=0

for label in tqdm(LABELS):
    i=0
#     path = base_dir + '/' + label
    path = os.path.join(base_dir, label)
#     print(path)
    for img in os.listdir(path):
#         print(img)
        image=load_img(os.path.join(path, img),
                       grayscale=False, color_mode='rgb', 
                       target_size=(img_size,img_size))
        image=img_to_array(image)
        image=image/255.0
        if i<1000:
            dataset.append([image,count])
            i+=1
        else:
            testset.append([image,count])
    count=count+1

X,y =zip(*dataset)
test, test_labels=zip(*testset)
y = to_categorical(y)
y=np.array(y)
X=np.array(X)
print("Train Shape:{}\nTrain Labels shape: {}".format(X.shape,y.shape))
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, random_state=99)

datagen = ImageDataGenerator(
    horizontal_flip=True,
    vertical_flip=True,
    rotation_range=20,
    zoom_range=0.2,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.1,
    fill_mode="nearest"
)

### Definerer funksjon for å predikere et bilde. Tar inn et bilde og henter ut dyret med høyest sannsynlighet

In [7]:
def predict_image(image):
    image=img_to_array(image)/255 
    prediction_image=np.array(image)
    prediction_image= np.expand_dims(image, axis=0)
    
    pred = model.predict(prediction_image)
    value = np.argmax(pred)
    return f"Pred: {LABELS_english[value]}"

### Lager den utrente modellen 

In [8]:
model = tf.keras.models.Sequential()

model.add(tf.keras.layers.Conv2D(16, (3, 3), activation=layer, input_shape=(img_size, img_size, 3), padding= "valid"))
model.add(tf.keras.layers.MaxPooling2D(3))

model.add(tf.keras.layers.Conv2D(32, (3, 3), activation=layer, padding= "valid"))
model.add(tf.keras.layers.MaxPooling2D(2))

model.add(tf.keras.layers.BatchNormalization())

model.add(tf.keras.layers.Conv2D(64, (3, 3), activation=layer, padding= "valid"))
model.add(tf.keras.layers.MaxPooling2D(1))


model.add(tf.keras.layers.Conv2D(64, (3, 3), activation=layer, padding= "valid"))
model.add(tf.keras.layers.MaxPooling2D(1))

model.add(tf.keras.layers.Flatten())

model.add(tf.keras.layers.Dense(64, activation=layer))
model.add(tf.keras.layers.Dense(32, activation=layer))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Dropout(0.2))



model.add(tf.keras.layers.Dense(10, activation='softmax'))
model.summary()

model.compile(loss='categorical_crossentropy',
             optimizer=tf.keras.optimizers.Adam(learning_rate = lr),
             metrics=['acc'])

### I cellen ovenfor lager vi modellen. Vi prøver å gjette hvilket dyr det er på bildene med denne ulærte modellen

In [10]:
image_dir = "raw-img"
sub_dirs = os.listdir(image_dir)
image_paths = []
complete_paths = []
for sub_dir in sub_dirs:
    image_path = os.listdir(f"{image_dir}/{sub_dir}")[0]
    complete_paths.append(f"{image_dir}/{sub_dir}/{image_path}")

plt.figure(figsize = (16,16))
for i in range(0,10):
    image=load_img(complete_paths[i],target_size=(img_size,img_size))
    prediction = predict_image(image)
    plt.subplot(5,5,i+1)    # the number of images in the grid is 5*5 (25)
    plt.title(prediction)
    plt.imshow(image)
plt.axis("off")
plt.tight_layout()
plt.show()

### La oss nå trene modellen på datasettet for å øke nøyaktigheten

In [None]:
history = model.fit(
     datagen.flow(X_train, y_train, batch_size=256), 
    validation_data = (X_valid, y_valid),
    epochs=25
)

### La oss se hvor bra modellen vår gjetter

In [None]:
plt.plot(history.history['acc'], label = "Acc")
plt.plot(history.history['val_acc'], label = "Val Acc")
plt.legend()
plt.show()

### Loss grafen kan fortelle oss om modellen lærer noe, og om enkelte parametre må endres for mer optimal trening

In [None]:
plt.plot(history.history['loss'], label = "loss")
plt.plot(history.history['val_loss'], label = "Val loss")
plt.legend()
plt.show()

### Vi prøver igjen å la modellen gjette på 10 dyrebilder den ikke har sett i treningssettet

In [None]:
image_dir = "raw-img"
sub_dirs = os.listdir(image_dir)
image_paths = []
complete_paths = []
for sub_dir in sub_dirs:
    image_path = os.listdir(f"{image_dir}/{sub_dir}")[0]
    complete_paths.append(f"{image_dir}/{sub_dir}/{image_path}")

plt.figure(figsize = (16,16))
for i in range(0,10):
    image=load_img(complete_paths[i],target_size=(img_size,img_size))
    prediction = predict_image(image)
    plt.subplot(5,5,i+1)    # the number of images in the grid is 5*5 (25)
    plt.title(prediction)
    plt.imshow(image)
plt.axis("off")
plt.show()

### For å øke nøyaktigheten ytterligere kan vi la den trene enda flere ganger

In [None]:
history = model.fit(
     datagen.flow(X_train, y_train, batch_size=256), 
    validation_data = (X_valid, y_valid),
    epochs=100, 
    initial_epoch = 25
)

In [None]:
plt.plot(history.history['acc'], label = "Acc")
plt.plot(history.history['val_acc'], label = "Val Acc")
plt.legend()
plt.show()

In [None]:
plt.plot(history.history['loss'],  label = "Loss")
plt.plot(history.history['val_loss'],  label = "Val loss")
plt.legend()
plt.show()

In [None]:
image_dir = "raw-img"
sub_dirs = os.listdir(image_dir)
image_paths = []
complete_paths = []
for sub_dir in sub_dirs:
    image_path = os.listdir(f"{image_dir}/{sub_dir}")[0]
    complete_paths.append(f"{image_dir}/{sub_dir}/{image_path}")

plt.figure(figsize = (16,16))
for i in range(0,10):
    image=load_img(complete_paths[i],target_size=(img_size,img_size))
    prediction = predict_image(image)
    plt.subplot(5,5,i+1)    # the number of images in the grid is 5*5 (25)
    plt.title(prediction)
    plt.imshow(image)
plt.axis("off")
plt.show()