# Implementation: CLIP Zero-Shot Classification

**Goal**: Classify without training.

In [None]:
import torch
import torch.nn.functional as F

# 1. Mock CLIP Model
# Assume we already have embeddings from the pre-trained model
# Image: A photo of a Golden Retriever
image_emb = torch.randn(1, 512)
image_emb = F.normalize(image_emb, dim=1)

# Text Labels
labels = ["a cat", "a dog", "a car"]
text_embs = torch.randn(3, 512)
# Hack: Make the second text embedding similar to the image
text_embs[1] = image_emb + 0.1 * torch.randn(1, 512)
text_embs = F.normalize(text_embs, dim=1)

# 2. Calculate Similarity (Dot Product)
logits = torch.matmul(image_emb, text_embs.T) * 100 # Temperature scaling
probs = F.softmax(logits, dim=1)

for i, label in enumerate(labels):
    print(f"{label}: {probs[0][i].item()*100:.2f}%")

## Conclusion
We classified the image as 'a dog' without ever training a specific 'dog vs cat' classifier.