In [1]:
!pip uninstall -y ip-adapter diffusers
!pip install --no-cache-dir git+https://github.com/Ahmed-Sherif-ASA/IP-Adapter@main
# !pip install --upgrade diffusers
!pip install diffusers

# Cell 2: Import after fresh install
import ip_adapter
from ip_adapter import IPAdapterXL
from diffusers import ControlNetModel, StableDiffusionXLControlNetPipeline

Found existing installation: ip-adapter 0.1.0
Uninstalling ip-adapter-0.1.0:
  Successfully uninstalled ip-adapter-0.1.0
Found existing installation: diffusers 0.35.1
Uninstalling diffusers-0.35.1:
  Successfully uninstalled diffusers-0.35.1
[0mCollecting git+https://github.com/Ahmed-Sherif-ASA/IP-Adapter@main
  Cloning https://github.com/Ahmed-Sherif-ASA/IP-Adapter (to revision main) to /tmp/pip-req-build-wlfovp4l
  Running command git clone --filter=blob:none --quiet https://github.com/Ahmed-Sherif-ASA/IP-Adapter /tmp/pip-req-build-wlfovp4l
  Resolved https://github.com/Ahmed-Sherif-ASA/IP-Adapter to commit ea6ea88a0b292dffc3fc81546cf6b83defa7c4d2
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
[?25hBuilding wheels for collected packages: ip-adapter
  Building wheel for ip-adapter (pyproject.toml) ... [?25ldone
[?25h  Created wheel for ip-adapter: filename=ip_adapt

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
from PIL import Image

import config  # ✅ will work if notebook is in same folder as config.py
from data.dataset import UnifiedImageDataset

In [3]:
import os
device = 'cuda'
HF_CACHE = "/data/hf-cache"
os.makedirs(HF_CACHE, exist_ok=True)

os.environ["HF_HOME"] = HF_CACHE
os.environ["HUGGINGFACE_HUB_CACHE"] = os.path.join(HF_CACHE, "hub")
os.environ["TRANSFORMERS_CACHE"]     = os.path.join(HF_CACHE, "transformers")
os.environ["DIFFUSERS_CACHE"]        = os.path.join(HF_CACHE, "diffusers")
os.environ["TORCH_HOME"]             = os.path.join(HF_CACHE, "torch")

In [4]:
# from IPython.display import display
# from torchvision import transforms
# from ip_adapter import IPAdapter
# from diffusers import ControlNetModel, StableDiffusionControlNetPipeline

# # Create pipeline with ControlNet built-in
# controlnet = ControlNetModel.from_pretrained("lllyasviel/control_v11p_sd15_canny")

# pipe = StableDiffusionControlNetPipeline.from_pretrained(
#     "runwayml/stable-diffusion-v1-5", 
#     controlnet=controlnet,
#     torch_dtype=torch.float16,
#     safety_checker=None,
#     feature_extractor=None,
#     cache_dir=HF_CACHE,
# ).to(device)

# pipe.controlnet = pipe.controlnet.to(dtype=torch.float16)

# # Now create IP-Adapter (it will automatically detect the ControlNet)
# ip_adapter = IPAdapter(
#     sd_pipe=pipe,  # This pipe already has ControlNet integrated
#     image_encoder_path="laion/CLIP-ViT-H-14-laion2B-s32B-b79K",
#     ip_ckpt="/data/thesis/models/ip-adapter_sd15.bin",
#     device=device,
#     embedding_type='clip'
# )

from IPython.display import display
from torchvision import transforms
import ip_adapter
from ip_adapter import IPAdapterXL
from diffusers import ControlNetModel, StableDiffusionXLControlNetPipeline
# from ip_adapter import IPAdapterXL  # Changed to XL version
# from diffusers import ControlNetModel, StableDiffusionControlNetPipeline

# Create pipeline with SDXL-compatible ControlNet
controlnet = ControlNetModel.from_pretrained("diffusers/controlnet-canny-sdxl-1.0")  # SDXL ControlNet

pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",  # SDXL base model
    controlnet=controlnet,
    torch_dtype=torch.float16,
    safety_checker=None,
    feature_extractor=None,
    cache_dir=HF_CACHE,
    device=device,
).to(device)

pipe.controlnet = pipe.controlnet.to(dtype=torch.float16)

# Now create IP-Adapter XL
ip_adapter = IPAdapterXL(  # Changed to XL version
    sd_pipe=pipe,
    image_encoder_path="laion/CLIP-ViT-bigG-14-laion2B-39B-b160k",  # ViT-bigG-14
    ip_ckpt="/data/thesis/models/ip-adapter_sdxl.bin",  # SDXL checkpoint
    device=device,
    embedding_type='clip'
)

Keyword arguments {'safety_checker': None, 'device': 'cuda'} are not expected by StableDiffusionXLControlNetPipeline and will be ignored.


Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [5]:
from features.clip_embeddings_xl_hf import check_ip_adapter_compatibility, update_model_id

config.json: 0.00B [00:00, ?B/s]

pytorch_model.bin.index.json: 0.00B [00:00, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

pytorch_model-00002-of-00002.bin:   0%|          | 0.00/169M [00:00<?, ?B/s]

pytorch_model-00001-of-00002.bin:   0%|          | 0.00/9.99G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

preprocessor_config.json:   0%|          | 0.00/316 [00:00<?, ?B/s]

Model projection_dim: 1280


In [6]:
check_ip_adapter_compatibility(ip_adapter)

IP-Adapter expected projection_dim: 1280
This script produces projection_dim: 1280
Compatible: True


(True, 1280, 1280)

In [None]:
import torch
from features.clip_embeddings_xl_hf import compute_clip_embedding_xl as hf_embedding
# Import your original OpenCLIP version
from features.clip_embeddings_xl import compute_clip_embedding_xl as openclip_embedding

# Test with the same image
test_image = torch.rand(3, 224, 224)  # Your test image

# Generate embeddings with both methods
hf_emb = hf_embedding(test_image)
openclip_emb = openclip_embedding(test_image)

print(f"HF embedding shape: {hf_emb.shape}")
print(f"OpenCLIP embedding shape: {openclip_emb.shape}")
print(f"Are they close? {torch.allclose(hf_emb, openclip_emb, atol=1e-3)}")
print(f"Max difference: {torch.max(torch.abs(hf_emb - openclip_emb))}")

  with torch.cuda.amp.autocast(dtype=torch.float16):


HF embedding shape: torch.Size([1280])
OpenCLIP embedding shape: torch.Size([1280])
Are they close? False
Max difference: 0.09958648681640625


  with torch.cuda.amp.autocast(dtype=torch.float16):


: 

In [None]:
import pandas as pd
from torch.utils.data import DataLoader

from data.dataset import UnifiedImageDataset
from features.clip_embeddings import generate_embeddings_fp16
from features.color_histograms import generate_color_histograms
from config import CSV_PATH, BATCH_SIZE, EMBEDDINGS_TARGET_PATH, XL_EMBEDDINGS_TARGET_PATH
from multiprocessing import cpu_count


df = pd.read_csv(CSV_PATH)
print('xxxx')
print(df.head())
print(df.shape)
# assert "local_path" in df.columns, "CSV must have a local_path column!"

def print_system_profile():
    import os, shutil, platform, psutil, torch
    print("=== SYSTEM PROFILE ===")
    print("Python:", platform.python_version())
    print("PyTorch:", torch.__version__)
    print("CPU cores:", psutil.cpu_count(logical=True))
    vm = psutil.virtual_memory()
    print(f"RAM: {vm.total/1e9:.1f} GB, free {vm.available/1e9:.1f} GB")
    du = shutil.disk_usage("/data")
    print(f"/data disk: total {du.total/1e9:.1f} GB, free {du.free/1e9:.1f} GB")
    print("CUDA available:", torch.cuda.is_available())
    if torch.cuda.is_available():
        i = torch.cuda.current_device()
        print("GPU:", torch.cuda.get_device_name(i))
        print(f"VRAM total: {torch.cuda.get_device_properties(i).total_memory/1e9:.1f} GB")
    print("======================")

print_system_profile()

dataset = UnifiedImageDataset(
    df, 
    mode="file_df"
)

num_cpu = cpu_count()

loader = DataLoader(
    dataset,
    batch_size=224,                    # then try 160/192/224
    shuffle=False,
    num_workers=min(32, max(8, num_cpu // 8)),  # 16–32 is a good sweet spot
    pin_memory=True,
    persistent_workers=True,
    prefetch_factor=6,                 # 4–8
    pin_memory_device="cuda",
)


embeddings = generate_embeddings_fp16(
    loader,
    EMBEDDINGS_TARGET_PATH,
    force_recompute=True
)