In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

import tarfile
import os
import shutil
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
import tensorflow as tf

os.makedirs("/content/images", exist_ok=True)
os.makedirs("/content/annotations_lvl1", exist_ok=True)

images_tar_path = "/content/drive/MyDrive/oxford_pets/images.tar.gz"
annotations_tar_path = "/content/drive/MyDrive/oxford_pets/annotations.tar.gz"

with tarfile.open(images_tar_path, 'r:gz') as tar:
    tar.extractall(path="/content/images")

with tarfile.open(annotations_tar_path, 'r:gz') as tar:
    tar.extractall(path="/content/annotations_lvl1")

annotations_path = "/content/annotations_lvl1/annotations/list.txt"

with open(annotations_path, "r") as f:
    lines = f.readlines()

image_label_pairs = []
for line in lines:
    if line.startswith("#"):
        continue
    parts = line.strip().split()
    image_name = parts[0] + ".jpg"
    label_id = int(parts[1]) - 1
    image_label_pairs.append((image_name, label_id))

train_data, val_data = train_test_split(
    image_label_pairs,
    test_size=0.2,
    stratify=[lbl for _, lbl in image_label_pairs]
)

base_dir = "/content/oxford_pets_processed"
for phase in ['train', 'val']:
    for class_id in range(37):
        os.makedirs(f"{base_dir}/{phase}/{class_id}", exist_ok=True)

images_path = "/content/images/images"

def copy_images(data, phase):
    for fname, label in data:
        src = os.path.join(images_path, fname)
        dst = os.path.join(base_dir, phase, str(label), fname)
        if os.path.exists(src):
            shutil.copyfile(src, dst)

copy_images(train_data, 'train')
copy_images(val_data, 'val')

image_size = (224, 224)
batch_size = 32

datagen = ImageDataGenerator(rescale=1./255)

train_gen = datagen.flow_from_directory(
    f"{base_dir}/train",
    target_size=image_size,
    batch_size=batch_size,
    class_mode='sparse'
)

val_gen = datagen.flow_from_directory(
    f"{base_dir}/val",
    target_size=image_size,
    batch_size=batch_size,
    class_mode='sparse'
)

model = Sequential([
    Conv2D(32, (3,3), activation='relu', input_shape=(224,224,3)),
    MaxPooling2D(2,2),
    Conv2D(64, (3,3), activation='relu'),
    MaxPooling2D(2,2),
    Flatten(),
    Dense(128, activation='relu'),
    Dense(37, activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

model.fit(train_gen, validation_data=val_gen, epochs=5)

