# Setup

### command line

In [1]:
# import the kaggle.json from kaggle API into colab
# do this command
# install kaggle library
!pip install kaggle
# make a directory named .kaggle
!mkdir ~/.kaggle
# copy the kaggle.json into this new directory
!cp kaggle.json ~/.kaggle/
# alocate the required permission for this file
!chmod 600 ~/.kaggle/kaggle.json
# download dataset
!kaggle datasets download doctrinek/oxford-iiit-cats-extended-10k

Downloading oxford-iiit-cats-extended-10k.zip to /content
100% 993M/993M [00:10<00:00, 147MB/s]
100% 993M/993M [00:10<00:00, 102MB/s]


In [3]:
!pip install split-folders

Collecting split-folders
  Downloading split_folders-0.5.1-py3-none-any.whl (8.4 kB)
Installing collected packages: split-folders
Successfully installed split-folders-0.5.1


### import libraries

In [16]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Dense, Flatten, Dropout, Conv2D, MaxPooling2D
import numpy as np
import matplotlib.pyplot as plt
import pathlib, zipfile, os, splitfolders

### extract zip file

In [5]:
zip_path = "/content/oxford-iiit-cats-extended-10k.zip"
zip_read = zipfile.ZipFile(zip_path, "r")
zip_read.extractall('/content/dataset')
zip_read.close()

In [8]:
os.listdir('/content/dataset/')

['CatBreedsRefined-v3']

### split into train and validation

In [10]:
base_dir = '/content/dataset/CatBreedsRefined-v3'
splitfolders.ratio(base_dir, output='/content/dataset/project', seed=6969, ratio=(0.8, 0.2))
train_dir = '/content/dataset/project/train'
validation_dir = '/content/dataset/project/val'


Copying files: 0 files [00:00, ? files/s][A
Copying files: 249 files [00:00, 2486.14 files/s][A
Copying files: 516 files [00:00, 2589.77 files/s][A
Copying files: 775 files [00:00, 2543.45 files/s][A
Copying files: 1030 files [00:00, 2452.33 files/s][A
Copying files: 1276 files [00:00, 2378.79 files/s][A
Copying files: 1515 files [00:00, 2219.48 files/s][A
Copying files: 1739 files [00:00, 1917.02 files/s][A
Copying files: 1938 files [00:00, 1683.65 files/s][A
Copying files: 2114 files [00:01, 1517.67 files/s][A
Copying files: 2273 files [00:01, 1388.75 files/s][A
Copying files: 2417 files [00:01, 1382.81 files/s][A
Copying files: 2576 files [00:01, 1370.48 files/s][A
Copying files: 2749 files [00:01, 1462.38 files/s][A
Copying files: 2945 files [00:01, 1595.11 files/s][A
Copying files: 3109 files [00:01, 1523.04 files/s][A
Copying files: 3265 files [00:01, 1515.05 files/s][A
Copying files: 3424 files [00:02, 1532.46 files/s][A
Copying files: 3579 files [00:02, 1489.

### explore data samples

In [15]:
def total_sample(directory):
  total = 0
  for folder in os.listdir(directory):
    folder_path = os.path.join(directory, folder)
    total += len(os.listdir(folder_path))

  return total

train_sample_length = total_sample(train_dir)
validation_sample_length = total_sample(validation_dir)
print(f"The train directory has {train_sample_length} samples")
print(f"The validation directory has {validation_sample_length} samples")
print(f"Which in total makes it {train_sample_length + validation_sample_length} samples")

The train directory has 8202 samples
The validation directory has 2055 samples
Which in total makes it 10257 samples


# Preprocess Data