In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from transformers import BertTokenizer, BertModel
from sklearn.metrics.pairwise import cosine_similarity
from torch.utils.data import DataLoader, Subset
from torchvision import datasets, models
import numpy as np
import matplotlib.pyplot as plt

In [2]:

transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])


trainset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
testset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)


print(trainset.classes)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170M/170M [02:11<00:00, 1.30MB/s] 


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified
['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']


In [3]:
train_images = []
train_labels = []
test_images = []
test_labels = []


classes = ['airplanes', 'cars', 'birds', 'cats', 'deer', 'dogs', 'frogs', 'horses', 'ships', 'trucks']


for image, label in trainset:
    if label != 9:  
        train_images.append(image)
        train_labels.append(label)

for image, label in testset:
    if label != 9: 
        test_images.append(image)
        test_labels.append(label)

In [4]:
train_loader = DataLoader(list(zip(train_images, train_labels)), batch_size=64, shuffle=True)
test_loader = DataLoader(list(zip(test_images, test_labels)), batch_size=64, shuffle=False)

In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  

In [6]:
#BERT: Text tokenizer, Text Encoder, Text to vector converter!

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
bert_model = BertModel.from_pretrained('bert-base-uncased').to(device)  

  return torch.load(checkpoint_file, map_location="cpu")
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [7]:
label_embeddings = []

for label in range(10): 
    if label != 9:  
        label_text = str(trainset.classes[label])
        print(label_text)
        inputs = tokenizer(label_text, return_tensors='pt', padding=True, truncation=True)
        outputs = bert_model(**inputs.to(device))  
        embeddings = outputs.last_hidden_state.mean(dim=1) 
        label_embeddings.append(embeddings.cpu().detach().numpy()) 

label_embeddings = np.array(label_embeddings)

airplane
automobile
bird
cat
deer
dog
frog
horse
ship


In [8]:
#desired_label = 'airplane'
#label_text = str(desired_label)
#inputs = tokenizer(label_text, return_tensors='pt', padding=True, truncation=True)
#outputs = bert_model(**inputs.to(device))  
#embeddings = outputs.last_hidden_state.mean(dim=1) 
#print('text embedding output:', embeddings.cpu().detach().numpy())

In [9]:
resnet18 = models.resnet18(pretrained=True)
resnet18.fc = nn.Linear(resnet18.fc.in_features, 768) 
resnet18 = resnet18.to(device)
criterion = nn.CosineEmbeddingLoss()
optimizer = optim.Adam(resnet18.parameters(), lr=0.0001)

#for param in resnet18.parameters():
    #print(param)

In [10]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics.pairwise import cosine_similarity



# Training loop
num_epochs = 5
for epoch in range(num_epochs):
    resnet18.train()  
    running_loss = 0.0
    
    for inputs, targets in train_loader:
        inputs, targets = inputs.to(device), targets.to(device)  
        
        # Assuming label_embeddings is precomputed and available for the targets
        target_embeddings = torch.tensor(label_embeddings[targets.cpu().numpy()]).to(device) 
        target_embeddings = target_embeddings.squeeze(1)
        
        optimizer.zero_grad()
        
        # Forward pass
        outputs = resnet18(inputs)
        outputs = outputs.to(device)

        # Define the similarity target
        similarity_target = torch.ones(outputs.size(0), device=device)
        
        # Calculate the loss
        loss = criterion(outputs, target_embeddings, similarity_target)
        
        # Backward pass and optimization
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    
    # Print the loss for each epoch
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}")


Epoch [1/5], Loss: 0.1037
Epoch [2/5], Loss: 0.0431
Epoch [3/5], Loss: 0.0270
Epoch [4/5], Loss: 0.0198
Epoch [5/5], Loss: 0.0152


In [11]:
import torch
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
resnet18.eval()

predicted_embeddings = []
true_embeddings = []

with torch.no_grad():
    for inputs, targets in test_loader:
        inputs, targets = inputs.to(device), targets.to(device)

        outputs = resnet18(inputs)
        outputs = outputs.to(device)

        target_embeddings = torch.tensor(label_embeddings[targets.cpu().numpy()]).to(device)
        target_embeddings = target_embeddings.squeeze(1)

      
        predicted_embeddings.append(outputs.cpu().numpy())
        true_embeddings.append(target_embeddings.cpu().numpy())


predicted_embeddings = np.concatenate(predicted_embeddings, axis=0)
true_embeddings = np.concatenate(true_embeddings, axis=0)
cos_similarities = cosine_similarity(predicted_embeddings, true_embeddings)
mean_cos_sim = np.mean(np.diag(cos_similarities))  
print("Mean Cosine Similarity:", mean_cos_sim)


Mean Cosine Similarity: 0.97888196


In [12]:
label = 'trucks'
label_text = str(label)
inputs = tokenizer(label_text, return_tensors='pt', padding=True, truncation=True)
outputs = bert_model(**inputs.to(device))  
unseen_embedding = outputs.last_hidden_state.mean(dim=1) 

In [13]:
truck_images = []
truck_labels = []

resnet18.eval()

for image, label in testset:
    if label == 9:  
        truck_images.append(image)
        truck_labels.append(label)


unseen_loader = DataLoader(list(zip(truck_images, truck_labels)), batch_size=64, shuffle=True)


In [14]:
predicted_embeddings = []
true_embeddings = []


with torch.no_grad():
    for inputs, targets in unseen_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        outputs = resnet18(inputs)
        outputs = outputs.to(device)

        target_embeddings = unseen_embedding
        target_embeddings = target_embeddings.squeeze(1)

        predicted_embeddings.append(outputs.cpu().numpy())
        true_embeddings.append(target_embeddings.cpu().numpy())


predicted_embeddings = np.concatenate(predicted_embeddings, axis=0)
true_embeddings = np.concatenate(true_embeddings, axis=0)


cos_similarities = cosine_similarity(predicted_embeddings, true_embeddings)
#print(cos_similarities)

mean_cos_sim = np.mean(np.diag(cos_similarities))  # Use diagonal
print("Mean Cosine Similarity:", mean_cos_sim)

Mean Cosine Similarity: 0.8512598


In [15]:
single_truck_image = truck_images[0].unsqueeze(0)
single_truck_image_transformed = single_truck_image.to(device)
resnet18.eval() 
with torch.no_grad():
    predicted_embedding = resnet18(single_truck_image_transformed).cpu().numpy()


cosine_sim_with_labels = cosine_similarity(predicted_embedding, unseen_embedding.cpu().detach().numpy())
print(cosine_sim_with_labels[0][0])


0.85693085


In [16]:
import torch
from transformers import BertModel, BertTokenizer, GPT2LMHeadModel, GPT2Tokenizer


gpt2_model = GPT2LMHeadModel.from_pretrained('gpt2')
gpt2_tokenizer = GPT2Tokenizer.from_pretrained('gpt2')


def generate_text_from_bert_embeddings(bert_embeddings):

    start_token = gpt2_tokenizer.encode("<|endoftext|>", return_tensors='pt')
    generated_output = gpt2_model.generate(
        input_ids=start_token,
        max_length=50,  
        num_return_sequences=1,
        no_repeat_ngram_size=2,
        temperature=0.7, 
    )
    
   
    generated_text = gpt2_tokenizer.decode(generated_output[0], skip_special_tokens=True)
    return generated_text

bert_embedding = torch.tensor(predicted_embedding)

generated_text = generate_text_from_bert_embeddings(bert_embedding)
print(f"Generated Text: {generated_text}")


  return torch.load(checkpoint_file, map_location="cpu")
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
2025-03-10 19:54:28.847940: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-03-10 19:54:29.088782: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-03-10 19:54:29.135422: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unabl

Generated Text: 
The first time I saw the new version of the game, I was so excited. I had never played a game like this before, and I couldn't wait to get started.

I was excited to see how the world of The


In [18]:
candidate_labels = ['airplanes', 'cars', 'birds', 'cats', 'owl']

candidate_labels_embeddings = []

for label in candidate_labels: 
    label_text = str(label)
    print(label_text)
    inputs = tokenizer(label_text, return_tensors='pt', padding=True, truncation=True)
    outputs = bert_model(**inputs.to(device))  
    embeddings = outputs.last_hidden_state.mean(dim=1) 
    candidate_labels_embeddings.append(embeddings.cpu().detach().numpy()) 

candidate_labels_embeddings = np.array(candidate_labels_embeddings)

airplanes
cars
birds
cats
owl


In [20]:
from PIL import Image
image_path = 'owl.jpg'  
image = Image.open(image_path).convert('RGB')
transformed_image = transform(image).unsqueeze(0)  
image.show()

/snap/core20/current/lib/x86_64-linux-gnu/libstdc++.so.6: version `GLIBCXX_3.4.29' not found (required by /lib/x86_64-linux-gnu/libproxy.so.1)
Failed to load module: /home/amin/snap/code/common/.cache/gio-modules/libgiolibproxy.so
eog: symbol lookup error: /snap/core20/current/lib/x86_64-linux-gnu/libpthread.so.0: undefined symbol: __libc_pthread_init, version GLIBC_PRIVATE


In [21]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
transformed_image = transformed_image.to(device)

In [None]:
resnet18.eval()
sim_list = []

with torch.no_grad():
    predicted_embedding = resnet18(transformed_image).cpu().numpy()

for cosin_sim in candidate_labels_embeddings:
    cosine_sim_with_labels = cosine_similarity(predicted_embedding, cosin_sim)
    print(cosine_sim_with_labels[0][0])
    sim_list.append(cosine_sim_with_labels[0][0])

result_idx = np.argmax(sim_list)
print(candidate_labels[result_idx])

0.3480415
0.35064304
0.29478022
0.38192862
0.3114499
cats
