In [3]:
#!pip install --upgrade transformers datasets
#!pip install --upgrade huggingface hub
#!pip install --upgrade pip
#!pip install --upgrade torch torchvision 

# Using feature embeddings to find new architectural concepts


First get an example feature output from DINO.

#### DINOv2 Model Example output:

In [5]:
from transformers import AutoImageProcessor, Dinov2Model
import torch, torchvision
from datasets import load_dataset
#from .autonotebook import tqdm as notebook_tqdm

In [6]:
print(torch.__version__)
print(torchvision.__version__)

2.1.2
0.16.2


In [7]:
device = torch.device("cuda" if torch.cuda.is_available()  
                            else "mps"  if torch.backends.mps.is_available() 
                            else "cpu")
print(f"Using device: {device}")

# If you have a model, move it to the device
# model.to(device)

Using device: mps


In [8]:
#!huggingface-cli login --token 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

Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to /Users/ari/.cache/huggingface/token
Login successful


In [14]:
dataset = load_dataset("huggingface/cats-image")
image = dataset["test"]["image"][0]

image_processor = AutoImageProcessor.from_pretrained("facebook/dinov2-base")
model = Dinov2Model.from_pretrained("facebook/dinov2-base")

inputs = image_processor(image, return_tensors="pt")
with torch.no_grad():
    outputs = model(**inputs)

last_hidden_states = outputs.last_hidden_state
list(last_hidden_states.shape)

You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this dataset from the next major release of `datasets`.
Downloading builder script: 100%|██████████| 2.56k/2.56k [00:00<00:00, 8.21MB/s]
Downloading data: 100%|██████████| 173k/173k [00:00<00:00, 804kB/s] 
Generating test split: 1 examples [00:00,  2.24 examples/s]
preprocessor_config.json: 100%|██████████| 436/436 [00:00<00:00, 2.73MB/s]
config.json: 100%|██████████| 548/548 [00:00<00:00, 1.66MB/s]
model.safetensors: 100%|██████████| 346M/346M [00:28<00:00, 11.9MB/s] 


[1, 257, 768]

#### last hidden state shape = feature embeddings per image patch

* 1 -> input batch size
* 257 -> 256 image patches the input images are split into (one additional token for a special purpose, like classification or a start/end token). 
* 768 -> This is the dimensionality of the embeddings. Each of the 257 elements (patches or tokens) is transformed into a 768-dimensional vector. 

In [15]:
last_hidden_states

tensor([[[-2.1747, -0.4729,  1.0936,  ...,  0.2041,  1.1101,  0.1363],
         [-3.2780, -0.8269, -0.9210,  ...,  1.4415, -0.5364, -0.8757],
         [-2.9129,  1.1284, -0.7306,  ...,  0.6959, -1.8791, -2.3638],
         ...,
         [-0.5463,  1.4382, -0.2563,  ...,  0.1873, -2.9950,  0.4067],
         [-3.0848,  2.0568,  1.5137,  ...,  0.9157, -2.7059,  2.2017],
         [-0.7499,  0.0903,  1.3731,  ..., -0.2961, -2.3682, -0.1329]]])

## Architectural epochs / styles dataset

used hierarchical dataset:
- https://www.kaggle.com/datasets/gustavoachavez/architectural-styles-periods-dataset

flat dataset alternative:
- https://www.kaggle.com/datasets/dumitrux/architectural-styles-dataset?resource=download


#### Preprocessing for DINO:


In [1]:
%pip install opencv-python

Collecting opencv-python
  Using cached opencv_python-4.9.0.80-cp37-abi3-macosx_11_0_arm64.whl.metadata (20 kB)
Using cached opencv_python-4.9.0.80-cp37-abi3-macosx_11_0_arm64.whl (35.4 MB)
Installing collected packages: opencv-python
Successfully installed opencv-python-4.9.0.80
Note: you may need to restart the kernel to use updated packages.


In [11]:
import numpy as np
import cv2

In [21]:
from transformers import AutoImageProcessor, Dinov2Model
import torch, torchvision

device = torch.device("cuda" if torch.cuda.is_available()  
                            #else "mps"  if torch.backends.mps.is_available() 
                            else "cpu")
print(f"Using device: {device}")

# If you have a model, move it to the device
# model.to(device)

import os
os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1'


Using device: cpu


#### calculating the centroid for all images in a folder:

In [181]:
import glob
from PIL import Image
from pathlib import Path
import os
import json

# Path to your folder
#folder_path = 'data/Architectural_Styles/Classical/Roman_Classical'
#image_paths = glob.glob(folder_path + '/*.jpg')  # Adjust the pattern as needed

# Specify the root folder path
root_folder = "data/Architectural_Styles/Modernism"

# Initialize the processor and model
image_processor = AutoImageProcessor.from_pretrained("facebook/dinov2-base")
model = Dinov2Model.from_pretrained("facebook/dinov2-base").to(device)

mean_last_hidden_states = []  # Store means here

# Iterate through all subdirectories in the root folder
for dirpath, dirnames, filenames in os.walk(root_folder):
    # dirpath: Current directory path
    # dirnames: List of subdirectory names in the current directory
    # filenames: List of file names in the current directory
    print(f"Found directory: {dirpath}"
          f"\nSubdirectories: {dirnames}")
    # Optional: You can add conditions to filter specific subfolders if needed
    # Example: if "exclude_folder" in dirpath:
    #              continue  # Skip this subfolder
    for dirname in dirnames:
        print(f"\t{dirname}")
    # Your code to run on each subfolder goes here
    # You can access the current subfolder's path with 'dirpath'
        image_paths = glob.glob(os.path.join(dirpath, dirname, '*.jpg')) 
        print(image_paths)
        for image_path in image_paths:
            # Load the image
            image = Image.open(image_path).convert("RGB")
            
            # Process the image
            inputs = image_processor(image, return_tensors="pt").to(device)
            
            # Perform inference
            with torch.no_grad():
                output = model(**inputs)
                
            mean_output = output.last_hidden_state.mean(dim=1)
            mean_last_hidden_states.append(mean_output)

        # Assuming mean_last_hidden_states is a list of tensors with shape [1, 768]
        # Stack all the mean last hidden state tensors along a new dimension
        all_means_tensor = torch.cat(mean_last_hidden_states, dim=0)

        # Calculate the centroid by computing the mean across the new batch dimension
        centroid = all_means_tensor.mean(dim=0)

        # get folder path
        path=Path(os.path.join(dirpath, dirname))

        # Extract the folder name
        folder_name = path.parts[-1]

        # save the centroid vector to a file
        # Path to the file
        file_path = 'embeddings_dict_modernism.json'

        # Check if the file exists
        if os.path.exists(file_path):
            # Load dictionary from file
            with open(file_path, 'r') as file:
                embeddings_dict_modernism = json.load(file)
            print("Dictionary loaded successfully.")
        else:
            print("File does not exist.")
            embeddings_dict_modernism = {}  # Initialize an empty dictionary or handle the absence as needed

        # Convert global_centroid_vector to a list for JSON compatibility, if necessary
        centroid_list = centroid.tolist()

        # Save to dictionary
        embeddings_dict_modernism[folder_name] = centroid_list

        # Save dictionary to file
        with open('embeddings_dict_modernism.json', 'w') as file:
            json.dump(embeddings_dict_modernism, file)

embeddings_dict_modernism.keys()




Found directory: data/Architectural_Styles/Eclecticism
Subdirectories: ['Art_Nouveau', 'American_Foursquare', 'American_craftsman_style']
	Art_Nouveau
['data/Architectural_Styles/Eclecticism/Art_Nouveau/42301.jpg', 'data/Architectural_Styles/Eclecticism/Art_Nouveau/4217.jpg', 'data/Architectural_Styles/Eclecticism/Art_Nouveau/42467.jpg', 'data/Architectural_Styles/Eclecticism/Art_Nouveau/42473.jpg', 'data/Architectural_Styles/Eclecticism/Art_Nouveau/42315.jpg', 'data/Architectural_Styles/Eclecticism/Art_Nouveau/42329.jpg', 'data/Architectural_Styles/Eclecticism/Art_Nouveau/42498.jpg', 'data/Architectural_Styles/Eclecticism/Art_Nouveau/42103.jpg', 'data/Architectural_Styles/Eclecticism/Art_Nouveau/42117.jpg', 'data/Architectural_Styles/Eclecticism/Art_Nouveau/42275.jpg', 'data/Architectural_Styles/Eclecticism/Art_Nouveau/42513.jpg', 'data/Architectural_Styles/Eclecticism/Art_Nouveau/42507.jpg', 'data/Architectural_Styles/Eclecticism/Art_Nouveau/42261.jpg', 'data/Architectural_Styles/Ecl

dict_keys(['Art_Nouveau', 'American_Foursquare', 'American_craftsman_style'])

In [182]:
embeddings_dict_ec.keys()

dict_keys(['Art_Nouveau', 'American_Foursquare', 'American_craftsman_style'])

In [179]:
# del embeddings_dict_ec["Roman_Classical"]

In [183]:
import numpy as np

current_dict = embeddings_dict_ec  # change this to the dictionary you want to use

# save the centroid vector to a file
# Path to the file
file_path_top = 'architecture_dict.json'

# Check if the file exists
if os.path.exists(file_path_top):
    # Load dictionary from file
    with open(file_path_top, 'r') as file:
        architecture_dict = json.load(file)
    print("Dictionary loaded successfully.")
else:
    print("File does not exist.")
    architecture_dict = {}  # Initialize an empty dictionary or handle the absence as needed


# Calculate the mean vector of the existing values
mean_vector = np.mean(list(current_dict.values()), axis=0)

folder_name_top = "Eclecticism"
# Save to dictionary
architecture_dict[folder_name_top] =   {"subcategories": current_dict,
                                                                 "mean_vector": mean_vector.tolist() }


# Save dictionary to file
with open('architecture_dict.json', 'w') as file:
    json.dump(architecture_dict, file)

Dictionary loaded successfully.


In [184]:
architecture_dict.keys()

dict_keys(['21st_Century', 'Baroque', 'Classical', 'Early_Christian_Medieval', 'Eclecticism'])

In [185]:
def print_keys_with_indent(dictionary, indent=0):
    for key, value in dictionary.items():
        if isinstance(value, dict):
            print(" " * indent + key + ":")
            print_keys_with_indent(value, indent + 4)  # Increase indentation for nested dictionaries
        else:
            print(" " * indent + key)

# Call the function to print keys with indentation
print_keys_with_indent(architecture_dict)


21st_Century:
    subcategories:
        Blobitecture
        Deconstructivism
        Eco-architecture
        Neo-Futurism
    mean_vector
Baroque:
    subcategories:
        Andean_Baroque
        Baroque
        Earthquake_Baroque
        Rococo
        Russian_Baroque
        Sicilian_Baroque
    mean_vector
Classical:
    subcategories:
        Achaemenid
        Ancient_Egyptian
        Herodian
        Roman_Classical
    mean_vector
Early_Christian_Medieval:
    subcategories:
        Romanesque
        Norman
        Medieval
        Venetian
        Byzantine
        Gothic
    mean_vector
Eclecticism:
    subcategories:
        Art_Nouveau
        American_Foursquare
        American_craftsman_style
    mean_vector


In [130]:
# Delete a subkey from the nested dictionary
del architecture_dict["Baroque"]["subcategories"]["Achaemenid"]

#### visualize image embeddings with thumbnails of images

In [2]:
#%pip install plotly

you have:

- projected_embeddings: A NumPy array of shape (n_images, 2) with your 2D projected embeddings.
- image_paths: A list of file paths corresponding to the images.

In [None]:
thumbnail_paths = []
thumbnail_size = (100, 100)

for path in image_paths:
    img = Image.open(path)
    img.thumbnail(thumbnail_size)
    thumbnail_path = f'thumbnail_{path}'
    img.save(thumbnail_path)
    thumbnail_paths.append(thumbnail_path)


This example uses customdata to store the path to the thumbnail images and displays them using the hover template. Plotly's hover templates allow HTML content, which is how the images are displayed.
Click events in Plotly typically require JavaScript or Dash (a Python framework for building web applications) for more complex interactions. The example above uses hover actions for simplicity, as true click events to display images would necessitate a more complex setup, likely involving a web server or Dash app.
Ensure that the paths to the thumbnails are accessible from where the Plotly figure is being viewed. If you're viewing the figure in a Jupyter notebook, the image paths may need to be relative to the notebook or served through a web server.

In [None]:
import plotly.graph_objects as go

# Create a scatter plot
fig = go.Figure(data=[go.Scatter(
    x=projected_embeddings[:, 0], 
    y=projected_embeddings[:, 1],
    mode='markers',
    marker=dict(size=5),
    customdata=thumbnail_paths,
    hoverinfo='none'
)])

# Update layout for hover functionality
fig.update_layout(
    hovermode='closest',
    title="Image Embeddings Visualization"
)

# Use JavaScript for custom hover functionality to display images
fig.update_traces(
    hovertemplate='<img src="%{customdata}"></img>',
)

fig.show()


alternatively:


In [None]:
import base64

def image_path_to_data_uri(path):
    with open(path, "rb") as image_file:
        encoded_string = base64.b64encode(image_file.read()).decode()
    return f"data:image/png;base64,{encoded_string}"

# Convert all thumbnail paths to data URIs
data_uris = [image_path_to_data_uri(path) for path in thumbnail_paths]

fig = go.Figure(data=[go.Scatter(
    x=projected_embeddings[:, 0], 
    y=projected_embeddings[:, 1],
    mode='markers',
    marker=dict(size=5),
    customdata=data_uris,
    hoverinfo='none'
)])

fig.update_layout(
    hovermode='closest',
    title="Image Embeddings Visualization"
)

# Update hover template to use data URIs
fig.update_traces(
    hovertemplate='<img src="%{customdata}"></img>',
)

fig.show()
