# Dog Breed Identification

In [3]:
# imports

# Data munging
import numpy as np
import pandas as pd

# Plotting
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

# Py modules
import random
from random import randint
import time

# Make it torch-y
import torch
from torch.utils.data import Dataset, random_split, DataLoader
import torch.nn.functional as F
import torch.nn as nn

# Torch-y vision stuff
import torchvision
import torchvision.models as models
import torchvision.transforms as T
from torchvision.utils import make_grid
from torchvision.datasets.utils import download_url
from torchvision.datasets import ImageFolder

# Loader bars
from tqdm.notebook import tqdm

# Image handling
from PIL import Image
from scipy import ndimage

# ML
from sklearn.metrics import f1_score

# System stuff
import os


## Getting Data

In [4]:
TRAIN_DIR = "train"
TEST_DIR  = "test"

train_csv = "labels.csv"
test_csv  = "submission.csv"

In [6]:
data_df = pd.read_csv(train_csv)
data_df.head(10)

Unnamed: 0,id,breed
0,000bec180eb18c7604dcecc8fe0dba07,boston_bull
1,001513dfcb2ffafc82cccf4d8bbaba97,dingo
2,001cdf01b096e06d78e9e5112d419397,pekinese
3,00214f311d5d2247d5dfe4fe24b2303d,bluetick
4,0021f9ceb3235effd7fcde7f7538ed62,golden_retriever
5,002211c81b498ef88e1b40b9abf84e1d,bedlington_terrier
6,00290d3e1fdd27226ba27a8ce248ce85,bedlington_terrier
7,002a283a315af96eaea0e28e7163b21b,borzoi
8,003df8b8a8b05244b1d920bb6cf451f9,basenji
9,0042188c895a2f14ef64a918ed9c7b64,scottish_deerhound


In [13]:
# Make a mapping from idx to name
label_names  = data_df['breed'].unique()
label_sorted = label_names.sort()

labels = dict(zip(range(len(label_names)), label_names))
labels_rev = dict(zip(label_names, range(len(label_names))))

In [31]:
# Augment data df with image path and label
def map_path(ID):
    return os.path.join(TRAIN_DIR, ID + ".jpg") 

#data_df['img_path'] = data_df['id'].apply(lambda x: map_path(x))
data_df['img_path'] = data_df['id'].map(map_path)
data_df['label'] = data_df['breed'].map(labels_rev) 

In [33]:
data_df.head()

Unnamed: 0,id,breed,label,img_path
0,000bec180eb18c7604dcecc8fe0dba07,boston_bull,19,train/000bec180eb18c7604dcecc8fe0dba07.jpg
1,001513dfcb2ffafc82cccf4d8bbaba97,dingo,37,train/001513dfcb2ffafc82cccf4d8bbaba97.jpg
2,001cdf01b096e06d78e9e5112d419397,pekinese,85,train/001cdf01b096e06d78e9e5112d419397.jpg
3,00214f311d5d2247d5dfe4fe24b2303d,bluetick,15,train/00214f311d5d2247d5dfe4fe24b2303d.jpg
4,0021f9ceb3235effd7fcde7f7538ed62,golden_retriever,49,train/0021f9ceb3235effd7fcde7f7538ed62.jpg
