# Cars196

In [None]:
# %load_ext autoreload
# %autoreload 2

In [None]:
import sys
sys.path.append('../')

from utils import repeat_n_times, compute_text_features
import torch
import numpy as np
from glob import glob
import pandas as pd
import os

In [None]:
# Hyperparameters
num_repetitions = 5

In [None]:
methods = ["ae"]

# Loading embeddings

In [None]:
# load image features and labels
image_features = torch.load("../data/cars196/image_features.pt")
df = pd.read_csv("../data/cars196/Cars196.csv")

We only use the test images from the dataset.

In [None]:
cutoff = 8054
image_features = {i: item for i, item in image_features.items() if i > cutoff}
df = df[cutoff:]

In [None]:
image_features = [image_features[i] for i in sorted(image_features.keys())]
image_features = torch.tensor(np.stack(image_features, axis=0))

In [None]:
image_features.shape

# Experiments

In [None]:
num_components_list = [2, 4, 8, 16, 32, 64, 128, 256, 512]

## Car Model

In [None]:
# Scrapes all car model names from the kbb page
# car_models = pd.read_html("https://www.kbb.com/car-make-model-list/new/view-all/make/")
# all_models = car_models[0]["Make"] + " " + car_models[0][".css-1mzj64w-ContentWrapper{margin:10px 24px 10px 0px;}Model"]
# all_models = all_models.dropna().tolist()
# all_models = list(set(all_models))

In [None]:
all_models = torch.load("../data/cars196/all_models_scraped_kbb_2022-08-17.pt")

In [None]:
labels = df["class_name"].tolist()
texts = [f"a photo of a {m}" for m in all_models]

text_features = compute_text_features(texts)

for num_components in num_components_list:
    print(f"Embedding size: {num_components}")
    
    path = f"dimensions_results/model_{num_components}.pt"

    means, stds = repeat_n_times(num_repetitions, labels, image_features, text_features=text_features, num_components=num_components, include_models=methods)

    # if we already have results, just concatenate the new columns    
    if os.path.exists(path):
        data = torch.load(path)
        means_old = data["means"]
        stds_old = data["stds"]
        means = pd.concat([means_old, means], axis="columns")
        stds = pd.concat([stds_old, stds], axis="columns")

    torch.save({"num_components": num_components, "means": means, "stds": stds}, path)

## Car Manufacturer

In [None]:
# # Scrapes all car model names from the kbb page
# car_models = pd.read_html("https://www.kbb.com/car-make-model-list/new/view-all/make/")
# manufacturers = car_models[0]["Make"]
# manufacturers = manufacturers.dropna().tolist()
# manufacturers = list(set(manufacturers))

In [None]:
manufacturers = torch.load("../data/cars196/manufacturers_scraped_kbb_2022-08-17.pt")

In [None]:
labels = df["manufacturer"].tolist()
texts = [f"a photo of a car produced by {m}" for m in manufacturers]

text_features = compute_text_features(texts)

for num_components in num_components_list:
    print(f"Embedding size: {num_components}")
    
    path = f"dimensions_results/manufacturer_{num_components}.pt"

    means, stds = repeat_n_times(num_repetitions, labels, image_features, text_features=text_features, num_components=num_components, include_models=methods)

    # if we already have results, just concatenate the new columns    
    if os.path.exists(path):
        data = torch.load(path)
        means_old = data["means"]
        stds_old = data["stds"]
        means = pd.concat([means_old, means], axis="columns")
        stds = pd.concat([stds_old, stds], axis="columns")

    torch.save({"num_components": num_components, "means": means, "stds": stds}, path)

## Car Type

In [None]:
labels = df["type"].tolist()
texts = [f"a photo of a {m}" for m in labels]

text_features = compute_text_features(texts)

for num_components in num_components_list:
    print(f"Embedding size: {num_components}")
    
    path = f"dimensions_results/type_{num_components}.pt"

    means, stds = repeat_n_times(num_repetitions, labels, image_features, text_features=text_features, num_components=num_components, include_models=methods)

    # if we already have results, just concatenate the new columns    
    if os.path.exists(path):
        data = torch.load(path)
        means_old = data["means"]
        stds_old = data["stds"]
        means = pd.concat([means_old, means], axis="columns")
        stds = pd.concat([stds_old, stds], axis="columns")

    torch.save({"num_components": num_components, "means": means, "stds": stds}, path)

# Cleaning up
If we have run the same model multiple times, only use the first occurrence in the files.

In [None]:
for i, identifier in enumerate(["model", "manufacturer", "type"]):
    files = glob(f"dimensions_results/{identifier}_*.pt")
    
    for f in files:
        data = torch.load(f)
        
        data["means"] = data["means"].loc[:, ~data["means"].columns.duplicated()]
        data["stds"] = data["stds"].loc[:, ~data["stds"].columns.duplicated()]

        torch.save(data, f)

# Visualization

In [None]:
import sys
sys.path.append("..")
from utils import model_to_name

import torch
from glob import glob
from matplotlib import pyplot as plt
import numpy as np
import matplotlib

plt.style.use(['science'])

In [None]:
def catch(func, handler):
    try:
        return func()
    except Exception as e:
        return handler(e)

In [None]:
plt.figure(figsize=(11, 1.7))

for i, (name, identifier) in enumerate([("Car Model", "model"), ("Manufacturer", "manufacturer"), ("Car Type", "type")]):
    ax = plt.subplot(1, 3, i+1)

    files = glob(f"dimensions_results/{identifier}_*.pt")
    data = [torch.load(f) for f in files]
    data = sorted(data, key=lambda x: x["num_components"])

    metric = "mean_average_precision_at_r"

    # for model in data[0]["means"].columns:
    for model in ["Random", "CLIP", "\model", "Rand. transform", "PCA", "Linear Autoencoder", "AE", "Oracle"]:
        x = np.array([d["num_components"] for d in data if model in d["means"].columns])
        means = np.array([a for d in data if (a:=catch(lambda: d["means"][model][metric], lambda e: None)) is not None])
        stds = np.array([a for d in data if (a:=catch(lambda: d["stds"][model][metric], lambda e: None)) is not None])

        ax.errorbar(x, means, yerr=stds, label=model_to_name.get(model, model))

    ax.set_title(name)
    ax.set_xticks(x, [str(i) if i >= 64 else "" for i in x])
    # removes all minor ticks, which would else blow up the axis and does not look good
    ax.xaxis.set_minor_locator(matplotlib.ticker.NullLocator())
    ax.set_xlabel("Embedding Size")
    if i == 0:
        ax.set_ylabel("MAP@R")
    
    if i == 1:
        plt.legend(loc="lower center", ncol=8, bbox_to_anchor=(0.5, -0.5))

plt.savefig(f"dimensions_results/cars196.pdf", bbox_inches="tight")
plt.show()