In [None]:
import tensorflow as tf
import tensorflow_hub as hub

tf.__version__
hub.__version__

In [None]:
print(tf.config.list_physical_devices("GPU"))
print( "YESS" if tf.config.list_physical_devices("GPU") else "NOPE")

In [None]:
import numpy as np
import pandas as pd

labels_csv = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/dog-breed-identification/labels.csv")
labels_csv.head()

In [None]:
labels_csv.describe()

In [None]:
# prompt: create graph of value counts of breeds column in df labels_csv with a line in representing the mean

import matplotlib.pyplot as plt

# Assuming labels_csv is already loaded as in your previous code
breed_counts = labels_csv["breed"].value_counts()
mean_count = np.mean(breed_counts)

plt.figure(figsize=(23, 6))
breed_counts.plot(kind="bar")
plt.axhline(mean_count, color='r', linestyle='--', label=f'Mean Count: {mean_count:.2f}')
plt.xlabel("Breed")
plt.ylabel("Count")
plt.title("Value Counts of Breeds")
plt.legend()
plt.show()

In [None]:
from IPython.display import Image

Image("/content/drive/MyDrive/Colab Notebooks/dog-breed-identification/train/001513dfcb2ffafc82cccf4d8bbaba97.jpg")

In [None]:
filenames = [filename for filename in labels_csv["id"].apply(lambda x: f"/content/drive/MyDrive/Colab Notebooks/dog-breed-identification/train/{x}.jpg")]

In [None]:
len(filenames)

In [None]:
import os
os.listdir("/content/drive/MyDrive/Colab Notebooks/dog-breed-identification/train")

In [None]:
labels = np.array(labels_csv["breed"])

In [None]:
unique_labels = np.unique(labels)
len(unique_labels)

In [None]:
print(labels[0])
labels[0] == unique_labels

In [None]:
boolean_labels = [label == unique_labels for label in labels]
boolean_labels[:2]

In [None]:
print(labels[0])
print(np.where(unique_labels == labels[0]))
print(boolean_labels[0].argmax())
print(boolean_labels[0].astype(int))

In [None]:
X = filenames
y = boolean_labels

In [None]:
NUM_IMAGES = 1000 #@param {type: "slider", min: 1000, max: 10000, step: 1000}

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(X[:NUM_IMAGES],
                                                    y[:NUM_IMAGES],
                                                    test_size = 0.2,
                                                    random_state = 42)

print(len(X_train), len(X_val), len(y_train), len(y_val))

In [None]:
from matplotlib.pyplot import imread

image = imread(filenames[42])
image.shape

In [None]:
tensor = tf.constant(image)
tensor.shape, tensor.ndim, tf.size(tensor)

In [None]:
IMG_SIZE = 224

def process_image(image_path, img_size = IMG_SIZE):
  image = tf.io.read_file(image_path)
  image = tf.image.decode_jpeg(image, channels = 3)
  image = tf.image.convert_image_dtype(image, tf.float32)
  image = tf.image.resize(image, size = [img_size, img_size])
  return image

In [None]:
def get_image_label(image_path, label):
  image = process_image(image_path)
  return image, label

In [None]:
(process_image(X[42]), tf.constant(y[42]))

In [None]:
BATCH_SIZE = 32 #@param {type: "slider", min: 0, max: 64, step: 4}

In [None]:
def create_data_batches(X, y = None, batch_size = BATCH_SIZE, valid_data = False, test_data = False):
    if test_data:
        print("Creating test data batches...")
        data = tf.data.Dataset.from_tensor_slices((tf.constant(X)))
        print(data)
        data_batch = data.map(process_image).batch(batch_size)
        return data_batch

    elif valid_data:
        print("Creating validation data batches...")
        data = tf.data.Dataset.from_tensor_slices((tf.constant(X), tf.constant(y)))
        data_batch = data.map(get_image_label).batch(batch_size)
        return data_batch

    else:
        print("Creating training data batches...")
        data = tf.data.Dataset.from_tensor_slices((tf.constant(X), tf.constant(y)))
        data = data.shuffle(buffer_size = len(X))
        data = data.map(get_image_label)
        data_batch = data.batch(batch_size)
        return data_batch

In [None]:
train_data = create_data_batches(X_train, y_train)
val_data = create_data_batches(X_val, y_val, valid_data = True)

In [None]:
train_data.element_spec, val_data.element_spec

In [None]:
train_images, train_labels = next(train_data.as_numpy_iterator())
train_images.shape, train_labels.shape

In [None]:
import matplotlib.pyplot as plt


def show_25_images(images, labels):
    plt.figure(figsize = (10, 10))
    for i in range(25):
        ax = plt.subplot(5, 5, i + 1)
        plt.imshow(images[i])
        plt.title(unique_labels[labels[i].argmax()])
        plt.axis("off")

In [None]:
show_25_images(train_images, train_labels)

In [None]:
val_images, val_labels = next(val_data.as_numpy_iterator())
show_25_images(val_images, val_labels)