# Nike vs Adidas Image Classification (Colab-ready)

This notebook uses **TensorFlow / Keras** with **ResNet50** transfer learning to classify images of Nike vs Adidas shoes.

How to use:

1. Upload `archive.zip` to Colab and place it in `/content` or mount Google Drive and provide path.
2. Run the cells in order. The notebook includes data preparation, model building, training, evaluation, and prediction cells.



In [None]:

# OPTIONAL: Mount Google Drive if you want to save models/results there
# from google.colab import drive
# drive.mount('/content/drive')

# If you uploaded archive.zip to /content or /mnt/data, adjust paths below.
DATA_ZIP = "/mnt/data/archive.zip"  # adjust if needed in Colab (/content/archive.zip)
EXTRACT_DIR = "/mnt/data/dataset"   # change if you extract elsewhere
print("Set DATA_ZIP and EXTRACT_DIR as needed before running unzip/extract cell.")


In [None]:
# Unzip dataset (if not already unzipped)
import os, zipfile
DATA_ZIP = "/mnt/data/archive.zip"
EXTRACT_DIR = "/mnt/data/dataset"
if not os.path.exists(EXTRACT_DIR):
    os.makedirs(EXTRACT_DIR, exist_ok=True)
    if os.path.exists(DATA_ZIP):
        with zipfile.ZipFile(DATA_ZIP, 'r') as z:
            z.extractall(EXTRACT_DIR)
        print("Extracted archive.zip to", EXTRACT_DIR)
    else:
        print("archive.zip not found at", DATA_ZIP)
else:
    print("Dataset directory already exists at", EXTRACT_DIR)
# show a few files
for root, dirs, files in os.walk(EXTRACT_DIR):
    print(root, "->", len(dirs), "dirs,", len(files), "files")
    for i, f in enumerate(files[:5]):
        print("   ", f)
    break


## Expected folder structure

The notebook expects the dataset to be organized like:

```
dataset/
 ├── train/
 │    ├── nike/
 │    └── adidas/
 ├── val/
                                │    ├── nike/
 │    └── adidas/
 └── test/
      ├── nike/
      └── adidas/
```

If your dataset is a single folder with class subfolders (e.g. train & test not present), the notebook includes code to split it.


In [None]:

# Data preparation with ImageDataGenerator
import os, shutil, random
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split

base_dir = "/mnt/data/dataset"  # adjust if necessary
# Try to find a train/val/test structure; if not, split automatically.
def find_class_folders(base_dir):
    # Look for class folders under base_dir or under first-level directories
    classes = {}
    for entry in os.listdir(base_dir):
        p = os.path.join(base_dir, entry)
        if os.path.isdir(p):
            # check if this looks like a class folder (contains images)
            files = [f for f in os.listdir(p) if f.lower().endswith(('.png','.jpg','.jpeg'))]
            if files:
                classes[entry] = p
    return classes

train_dir = os.path.join(base_dir, 'train')
val_dir = os.path.join(base_dir, 'val')
test_dir = os.path.join(base_dir, 'test')

if not os.path.exists(train_dir):
    # attempt to auto-split if dataset contains class folders directly under base_dir
    class_folders = find_class_folders(base_dir)
    if class_folders:
        print("Detected class folders directly under dataset. Creating train/val/test split...")
        # create directories
        for split in ['train','val','test']:
            os.makedirs(os.path.join(base_dir, split), exist_ok=True)
        for cls, cls_path in class_folders.items():
            images = [f for f in os.listdir(cls_path) if f.lower().endswith(('.png','.jpg','.jpeg'))]
            train_files, temp = train_test_split(images, test_size=0.3, random_state=42)
            val_files, test_files = train_test_split(temp, test_size=0.5, random_state=42)
            for split_name, files in zip(['train','val','test'], [train_files, val_files, test_files]):
                dest_cls = os.path.join(base_dir, split_name, cls)
                os.makedirs(dest_cls, exist_ok=True)
                for fname in files:
                    src = os.path.join(cls_path, fname)
                    dst = os.path.join(dest_cls, fname)
                    if not os.path.exists(dst):
                        shutil.copy(src, dst)
        print("Split created at:", base_dir)
    else:
        raise FileNotFoundError(f"Couldn't find class folders under {base_dir}. Please arrange your data or adjust base_dir.")

# Setup ImageDataGenerator
IMG_SIZE = (224,224)
BATCH_SIZE = 32

train_datagen = ImageDataGenerator(rescale=1./255,
                                   rotation_range=20,
                                   width_shift_range=0.1,
                                   height_shift_range=0.1,
                                   shear_range=0.1,
                                   zoom_range=0.1,
                                   horizontal_flip=True,
                                   fill_mode='nearest')

test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    os.path.join(base_dir, 'train'),
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical'
)

val_generator = test_datagen.flow_from_directory(
    os.path.join(base_dir, 'val'),
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=False
)

test_generator = test_datagen.flow_from_directory(
    os.path.join(base_dir, 'test'),
    target_size=IMG_SIZE,
    batch_size=1,
    class_mode='categorical',
    shuffle=False
)

print("Classes:", train_generator.class_indices)


In [None]:

# Build model using ResNet50 (transfer learning)
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau

num_classes = 2  # Nike vs Adidas
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224,224,3))
base_model.trainable = False  # freeze base

model = models.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dropout(0.5),
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(num_classes, activation='softmax')
])

model.compile(optimizer=optimizers.Adam(learning_rate=1e-4),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

model.summary()


In [None]:

# Training
EPOCHS = 10  # increase when you run for real
checkpoint_path = "/mnt/data/resnet50_shoes_best.h5"

callbacks = [
    ModelCheckpoint(checkpoint_path, monitor='val_accuracy', save_best_only=True, verbose=1),
    EarlyStopping(monitor='val_accuracy', patience=5, restore_best_weights=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, verbose=1)
]

history = model.fit(
    train_generator,
    epochs=EPOCHS,
    validation_data=val_generator,
    callbacks=callbacks
)
# Save final model
model.save('/mnt/data/resnet50_shoes_final.h5')
print('Saved model to /mnt/data/resnet50_shoes_final.h5')


In [None]:

# Evaluation on test set
import numpy as np
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt

# Load best model if checkpoint exists
try:
    model.load_weights('/mnt/data/resnet50_shoes_best.h5')
    print('Loaded best weights.')
except Exception as e:
    print('Could not load best weights:', e)

# Predictions
test_steps = test_generator.samples
preds = model.predict(test_generator, steps=test_steps, verbose=1)
y_pred = np.argmax(preds, axis=1)
y_true = test_generator.classes

# Report
print('Classification Report:')
print(classification_report(y_true, y_pred, target_names=list(train_generator.class_indices.keys())))

cm = confusion_matrix(y_true, y_pred)
plt.figure(figsize=(5,5))
plt.imshow(cm, interpolation='nearest')
plt.title('Confusion matrix')
plt.colorbar()
plt.xticks(range(len(train_generator.class_indices)), list(train_generator.class_indices.keys()), rotation=45)
plt.yticks(range(len(train_generator.class_indices)), list(train_generator.class_indices.keys()))
plt.xlabel('Predicted')
plt.ylabel('True')
for (i, j), val in np.ndenumerate(cm):
    plt.text(j, i, val, ha='center', va='center', color='white' if val>cm.max()/2 else 'black')
plt.show()


In [None]:

# Plot training curves
import matplotlib.pyplot as plt
acc = history.history.get('accuracy', [])
val_acc = history.history.get('val_accuracy', [])
loss = history.history.get('loss', [])
val_loss = history.history.get('val_loss', [])

epochs_range = range(1, len(acc)+1)

plt.figure(figsize=(12,4))
plt.subplot(1,2,1)
plt.plot(epochs_range, acc, label='Train Acc')
plt.plot(epochs_range, val_acc, label='Val Acc')
plt.legend()
plt.title('Accuracy')

plt.subplot(1,2,2)
plt.plot(epochs_range, loss, label='Train Loss')
plt.plot(epochs_range, val_loss, label='Val Loss')
plt.legend()
plt.title('Loss')
plt.show()


In [None]:

# Predict on custom images - upload an image in Colab and set IMAGE_PATH
from tensorflow.keras.preprocessing import image
import numpy as np
IMAGE_PATH = None  # set to '/content/my_shoe.jpg' or path in /mnt/data

def predict_image(img_path, model, target_size=(224,224)):
    img = image.load_img(img_path, target_size=target_size)
    x = image.img_to_array(img)
    x = x / 255.0
    x = np.expand_dims(x, axis=0)
    preds = model.predict(x)
    cls_idx = np.argmax(preds, axis=1)[0]
    class_labels = list(train_generator.class_indices.keys())
    return class_labels[cls_idx], preds[0][cls_idx]

print('Set IMAGE_PATH to an image file and run predict_image(IMAGE_PATH, model)')


### Dataset extraction

`archive.zip` was found and extracted to `/mnt/data/dataset`. Top-level folders:

- `/mnt/data/dataset`: dirs=['test', 'train', 'validation'], files_sample=['labelnames.csv']
- `/mnt/data/dataset/test`: dirs=['adidas', 'nike'], files_sample=[]
- `/mnt/data/dataset/test/adidas`: dirs=[], files_sample=['Adidas (18).jpg', 'Adidas (19).jpg', 'Adidas (20).jpg', 'Adidas (21).JPG', 'Adidas (22).jpg']
- `/mnt/data/dataset/test/nike`: dirs=[], files_sample=['Image_10.jpg', 'Image_100.jpg', 'Image_110.jpg', 'Image_120.jpg', 'Image_130.jpg']
- `/mnt/data/dataset/train`: dirs=['adidas', 'nike'], files_sample=[]
- `/mnt/data/dataset/train/adidas`: dirs=[], files_sample=['Adidas (1).jpg', 'Adidas (10).jpg', 'Adidas (11).jpg', 'Adidas (12).jpg', 'Adidas (13).jpg']
- `/mnt/data/dataset/train/nike`: dirs=[], files_sample=['Image_1.jpg', 'Image_101.jpg', 'Image_102.jpg', 'Image_103.jpg', 'Image_104.JPG']
- `/mnt/data/dataset/validation`: dirs=['adidas', 'nike'], files_sample=[]
- `/mnt/data/dataset/validation/adidas`: dirs=[], files_sample=['adidas_ (149).jpg', 'adidas_ (150).jpg', 'adidas_ (151).jpg', 'adidas_ (152).jpg', 'adidas_ (153).jpg']
- `/mnt/data/dataset/validation/nike`: dirs=[], files_sample=['Image_261.jpg', 'Image_262.jpg', 'Image_263.jpg', 'Image_264.jpg', 'Image_265.jpg']
