<a href="https://colab.research.google.com/github/Schypozhoa/TourismClassifier/blob/master/Model/ModelGenerator_TourismClassifier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## IMPORT ALL NEEDED MODULE

In [1]:
from tensorflow.keras import layers
from tensorflow.keras import models
from tensorflow.keras import optimizers
from tensorflow.keras import losses
import tensorflow as tf
import matplotlib.pyplot as plt
import os
import shutil
import zipfile

# Split Dataset in Local Environment

In [2]:
def splitDataset(originalPath, ratio, savedPath, splitted):
    # Check if the originalPath folder exist or not
    if not os.path.exists(originalPath):
        raise Exception("OriginalPath folder not found, please check the path")

    # Check if the savedPath folder exist or not
    if not os.path.exists(savedPath):
        os.makedirs(savedPath)
    elif splitted:
        return os.path.join(savedPath, "train"), os.path.join(savedPath, "test")
    else:
        raise Exception("""SavedPath folder already exist and ALREADY_SPLITTED = FALSE, 
                        please choose another folder or change the ALREADY_SPLITTED variable to TRUE if you want to use the existing folder""")
    
    # Create train and test folder
    trainPath = os.path.join(savedPath, "train")
    testPath = os.path.join(savedPath, "test")
    if not os.path.exists(trainPath):
        os.makedirs(trainPath)
    if not os.path.exists(testPath):
        os.makedirs(testPath)

    # Get all the class name from the originalPath folder and create the folder in train and test folder
    classes = os.listdir(originalPath)
    for className in classes:
        className = className[7:]
        trainClassPath = os.path.join(trainPath, className)
        testClassPath = os.path.join(testPath, className)
        if not os.path.exists(trainClassPath):
            os.makedirs(trainClassPath)
        if not os.path.exists(testClassPath):
            os.makedirs(testClassPath)

    # Split the dataset and keep the copy in the originalPath folder
    for className in classes:
        classNameOriginal = className
        className = className[7:]
        classPathOriginal = os.path.join(originalPath, classNameOriginal)
        trainClassPath = os.path.join(trainPath, className)
        testClassPath = os.path.join(testPath, className)
        images = os.listdir(classPathOriginal)
        trainImages = images[:int(len(images)*ratio)]
        testImages = images[int(len(images)*ratio):]
        for image in trainImages:
            shutil.copy(os.path.join(classPathOriginal, image), os.path.join(trainClassPath, image))
        for image in testImages:
            shutil.copy(os.path.join(classPathOriginal, image), os.path.join(testClassPath, image))

    return trainPath, testPath

# Extract Dataset in Colab

In [3]:
!wget https://github.com/Schypozhoa/TourismClassifier/raw/master/Data/AttractionDataset-Splitted.zip

--2023-05-24 14:20:39--  https://github.com/Schypozhoa/TourismClassifier/raw/master/Data/AttractionDataset-Splitted.zip
Resolving github.com (github.com)... 192.30.255.113
Connecting to github.com (github.com)|192.30.255.113|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://raw.githubusercontent.com/Schypozhoa/TourismClassifier/master/Data/AttractionDataset-Splitted.zip [following]
--2023-05-24 14:20:39--  https://raw.githubusercontent.com/Schypozhoa/TourismClassifier/master/Data/AttractionDataset-Splitted.zip
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 31058170 (30M) [application/zip]
Saving to: ‘AttractionDataset-Splitted.zip’


2023-05-24 14:20:41 (138 MB/s) - ‘AttractionDataset-Splitted.zip’ saved [31058170/310581

In [4]:
!unzip -q "/content/AttractionDataset-Splitted.zip"

In [5]:
def extractDataset():
    train = "/content/AttractionDataset-Splitted/train"
    val = "/content/AttractionDataset-Splitted/test"
    return train, val

# Create the model

In [6]:
def createModel(optimizer, loss):
    # Define the pretrained model
    pretrainedModel = tf.keras.applications.VGG19(weights='imagenet', 
                                                  include_top=False, 
                                                  input_shape=(300, 300, 3))
    pretrainedModel.trainable = False

    # Create the model
    model = models.Sequential([ 
        pretrainedModel,
        layers.Flatten(),
        layers.Dense(512, activation='relu'),
        layers.Dense(16, activation='softmax')
    ])

    # Compile the model
    model.compile(
        optimizer=optimizer,
        loss=loss,
        metrics=['accuracy']
        ) 
    
    return model

# Preprocess and augment the Train and Validation Generator

In [7]:
def trainValGen(trainPath, valPath):
    # Do data augmentation for train data and prepare validation data
    trainDatagen = tf.keras.preprocessing.image.ImageDataGenerator(
        rescale=1./255,
        rotation_range=40,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='reflect')

    valDatagen = tf.keras.preprocessing.image.ImageDataGenerator(
        rescale=1./255,
        )
    
    trainGenerator = trainDatagen.flow_from_directory(
        directory=trainPath,
        batch_size=25,
        class_mode='categorical',
        target_size=(300, 300)
        )
    
    valGenerator = valDatagen.flow_from_directory(
        directory=valPath,
        batch_size=25,
        class_mode='categorical',
        target_size=(300, 300)
        )

    return trainGenerator, valGenerator

# Plot the accuracy

In [8]:
def plotHistory(history):
    acc=history.history['accuracy']
    val_acc=history.history['val_accuracy']
    loss=history.history['loss']
    val_loss=history.history['val_loss']

    epochs=range(len(acc))

    plt.plot(epochs, acc, 'r', "Train Acc")
    plt.plot(epochs, val_acc, 'b', "Val Acc")
    plt.title('TrainVal accuracy')
    plt.show()

    plt.plot(epochs, loss, 'r', "Train Loss")
    plt.plot(epochs, val_loss, 'b', "Val Loss")
    plt.show()

# Preprocess the prediction data

In [9]:
def predGen(predPath):
    # Prepare the prediction data
    predDatagen = tf.keras.preprocessing.image.ImageDataGenerator(
        rescale=1./255,
        )
    
    predGenerator = predDatagen.flow_from_directory(
        directory=predPath,
        class_mode=None,
        target_size=(300, 300)
        )
    
    return predGenerator

# Main Pipeline

Kalo di colab, tinggal ganti ON_COLAB = True

In [None]:
# Variable for dataset, splitted = dataset with train and test folder
DATASET_PATH = "C:/Users/Administrator/Desktop/Capstone/Data/AttractionDataset/"
SPLITTED_PATH = "C:/Users/Administrator/Desktop/Capstone/Data/AttractionDataset-Splitted/"
PREDICTION_PATH = ""
RATIO = 0.8
ALREADY_SPLITTED = True
ON_COLAB = True

# Prepare the dataset
if not ON_COLAB:
    trainPath, valPath = splitDataset(DATASET_PATH, RATIO, SPLITTED_PATH, ALREADY_SPLITTED)
else:
    trainPath, valPath = extractDataset()
trainGen, valGen = trainValGen(trainPath, valPath)

# Show images from the trainGen ImageDataGenerator using matplotlib
# NUM_IMAGES = 25
# x, y = trainGen.next()
# fig = plt.figure(figsize=(10, 10))
# for i in range(0, NUM_IMAGES):
#     image = x[i]
#     fig.add_subplot(5, 5, i+1)
#     plt.imshow(image)
# plt.show()

# Show 1 sample images from the trainGen ImageDataGenerator using matplotlib
# x, y = trainGen.next()
# image = x[0]
# plt.imshow(image)
# plt.show()

# Variable for the model
LEARNING_RATE = 0.001
OPTIMIZER = optimizers.Adam(learning_rate=LEARNING_RATE)
LOSS = losses.CategoricalCrossentropy()
SAVED_MODEL_PATH = "C:/Users/Administrator/Desktop/Capstone/Model/testVGG19.h5"
SAVED_MODEL_PATH_COLAB = "/content/testVGG19.h5"

# Create the model
model = createModel(OPTIMIZER, LOSS)

# Train the model
history = model.fit(
    trainGen, 
    epochs=10, 
    validation_data=valGen, 
    verbose=1
    )

# Save and plot the model
if not ON_COLAB:
  model.save(SAVED_MODEL_PATH)
else:
  model.save(SAVED_MODEL_PATH_COLAB)
plotHistory(history)

# This code is for testing the prediction using saved model

# model = tf.keras.saving.load_model("C:/Users/Administrator/Desktop/Capstone/Model/Test.h5")
# res = model.predict(predGen)
# lab = list(trainGen.class_indices.keys())
# sel = res.argmax(axis=1)
# print(res)
# print(predGen.filenames)
# print(f"Predicted class: {lab[sel[0]]}, with probability {res[0][sel[0]]}")


Found 2108 images belonging to 16 classes.
Found 535 images belonging to 16 classes.
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg19/vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
