# DoggieTech Dog Image Classifier
A machine learning model to identify a dog breed

In [1]:
# Get dataset from Kaggle
!pip install kaggle
!kaggle datasets download -d jessicali9530/stanford-dogs-dataset

Dataset URL: https://www.kaggle.com/datasets/jessicali9530/stanford-dogs-dataset
License(s): other
stanford-dogs-dataset.zip: Skipping, found more recently modified local copy (use --force to force download)


## Python Imports

In [2]:
import os
import matplotlib.pyplot as plt
import numpy as np
import zipfile
import pandas as pd
import PIL
from PIL import Image
import imageio.v3 as iio
import skimage as ski
import xml.etree.ElementTree as ET


## Unzip dataset for use

In [4]:
with zipfile.ZipFile("stanford-dogs-dataset.zip", "r") as z:
  z.extractall("sd_dataset")

In [3]:
def convertImgToNpArray(anImage: Image):
  return np.array(anImage)

def cropImage(anImage, boundingBox: tuple):
  return anImage.crop(boundingBox)

def displayImage(anImageArray: np.array):
  plt.imshow(anImageArray)
  return

In [72]:
def annotationParse(annotationDir : str, imageDir : str):
  annotationDict = [['id', 'breed','image location','xStart', 'yStart', 'xEnd', 'yEnd']]

  # For every cateogory (aka dog breed)
  for breedDir in os.listdir(annotationDir):
    # print(breedDir)
    # For every dog
    for sampleDog in os.listdir(os.path.join(annotationDir, breedDir)):
      # Generate XML root
      tree = ET.parse(os.path.join(annotationDir, breedDir, sampleDog))  
      root = tree.getroot()
      # Set dog image
      dogImg = os.path.join(imageDir, breedDir, sampleDog + ".jpg")
      # Get dog breed
      dogBreed = root[5][0].text
      # Get dog ID / file name (annotation file name)
      dogID = sampleDog
      # Get image bounding box
      xStart, yStart, xEnd, yEnd = (
        root[5][4][0].text,
        root[5][4][1].text,
        root[5][4][2].text,
        root[5][4][3].text
        )
      # Add dog info to dictionary
      annotationDict.append([dogID, dogBreed, dogImg, xStart, yStart, xEnd, yEnd])

  annotationDict = np.array(annotationDict)
  return pd.DataFrame(annotationDict[1:], columns=annotationDict[0])

## Organizing the images into a dataset

In [None]:
annotationDir = os.path.join("sd_dataset","annotations","Annotation")
imageDir = os.path.join("sd_dataset","images","Images")

dataFrame=annotationParse(annotationDir=annotationDir, imageDir=imageDir)
dataFrame.head()


Unnamed: 0,id,breed,image location,xStart,yStart,xEnd,yEnd
0,n02085620_10074,Chihuahua,sd_dataset\images\Images\n02085620-Chihuahua\n...,25,10,276,498
1,n02085620_10131,Chihuahua,sd_dataset\images\Images\n02085620-Chihuahua\n...,49,9,393,493
2,n02085620_10621,Chihuahua,sd_dataset\images\Images\n02085620-Chihuahua\n...,142,43,335,250
3,n02085620_1073,Chihuahua,sd_dataset\images\Images\n02085620-Chihuahua\n...,0,27,312,498
4,n02085620_10976,Chihuahua,sd_dataset\images\Images\n02085620-Chihuahua\n...,90,104,242,452


In [80]:
# Take a DataFrame and generate labels and data for training
def createLabelAndDataLists(dataFrame: pd.DataFrame):
  data = []
  labels = []
  for item in dataFrame.itertuples():
    boundingBox = (int(item[4]), int(item[5]), int(item[6]), int(item[7]))
    image = Image.open(item[3], 'r')
    formattedImage = convertImgToNpArray(cropImage(anImage=image, boundingBox=boundingBox))
    image.close()
    data.append(formattedImage)
    labels.append(item[2])
  return data, labels


In [None]:

data, labels = createLabelAndDataLists(dataFrame=dataFrame)
# for i in range(10):
#   plt.figure()
#   plt.imshow(data[i])

## Creating the model