In [1]:
pip install transformers torch


Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch)
  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)
Collecting nvidia-cufft-cu12==11.0.2.54 (from torch)
  Using cached nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl (121.6 MB)
Collecting nvidia-curand-cu12==10.3.2.106 (from torch)
  Using cached nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl (56.5 MB)
Collectin

Task 1: Sentence Transformer Implementation

In [14]:
import torch
from transformers import AutoTokenizer, AutoModel

model_name = "sentence-transformers/paraphrase-mpnet-base-v2" # Loading the pre-trained model
tokenizer = AutoTokenizer.from_pretrained(model_name)         #Loading the tokenizer
model = AutoModel.from_pretrained(model_name)

# Sample sentences
sentences = [
    "This is a test sentence.",
    "Sentence transformers are great for encoding text.",
    "How do we generate embeddings for sentences?",
    "i feel pretty pathetic most of the time",
    "i have the feeling she was amused and delighted",
    "i started feeling sentimental about dolls i had as a child and so began a collection of vintage barbie dolls from the sixties",
    "i found myself feeling a little discouraged that morning",
    "i feel so worthless during those times i was struggling finding work"
]

# Tokenize sentences
inputs = tokenizer(sentences, padding=True, truncation=True, return_tensors="pt")

# Encode sentences
with torch.no_grad():
    outputs = model(**inputs)

# Obtain embeddings (mean pooling)
embeddings = outputs.last_hidden_state.mean(dim=1)

# Convert embeddings to numpy for easier handling
embeddings = embeddings.cpu().numpy()

# Display the embeddings
for i, sentence in enumerate(sentences):
    print(f"Sentence: {sentence}")
    print(f"Embedding: {embeddings[i]}\n")


Sentence: This is a test sentence.
Embedding: [ 6.06191158e-02 -6.51130080e-02 -2.15716846e-02 -5.68220131e-02
 -8.60867500e-02  3.70154306e-02  4.20839302e-02  7.67771304e-02
  2.68999726e-01  6.93117455e-02  2.38866419e-01  5.85427172e-02
 -1.88073620e-01 -3.61676127e-01  5.99301383e-02  3.71237211e-02
  3.65239531e-02 -5.06813638e-02 -1.68494359e-02  4.47940268e-02
 -6.98005557e-02  1.34221325e-02 -4.06300724e-02 -4.47222292e-02
 -1.19212426e-01 -4.43259329e-02  5.25720678e-02  3.96465808e-02
  6.28655404e-02 -1.85307290e-03  6.34944513e-02  2.45078392e-02
 -1.87325224e-01 -9.07272920e-02  5.13952635e-02 -6.85666502e-02
 -2.11607339e-03 -7.31154904e-02 -3.24255288e-01  5.05014956e-02
 -3.02251846e-01  2.17586786e-01  1.54066319e-03 -9.99964681e-03
 -6.22047856e-02  8.63398239e-02  1.59829527e-01 -2.56585091e-01
 -1.15372986e-01  1.44172147e-01  1.03009799e-02  2.67084628e-01
 -2.34714314e-01  5.81825115e-02  3.19298595e-01 -1.35602534e-01
 -6.32382855e-02 -9.38445777e-02  2.00218961

Task 2: Multi-Task Learning Expansion
Task A: Sentence Classification
Task B: Sentiment Analysis

In [16]:
import torch
from torch import nn
from transformers import AutoTokenizer, AutoModel

class MultiTaskModel(nn.Module):
    def __init__(self, model_name, num_classes_task_a, num_classes_task_b):
        super(MultiTaskModel, self).__init__()
        self.encoder = AutoModel.from_pretrained(model_name)
        self.classifier_task_a = nn.Linear(self.encoder.config.hidden_size, num_classes_task_a)
        self.classifier_task_b = nn.Linear(self.encoder.config.hidden_size, num_classes_task_b)

    def forward(self, input_ids, attention_mask):
        outputs = self.encoder(input_ids, attention_mask=attention_mask)
        pooled_output = outputs.last_hidden_state.mean(dim=1)  # Mean pooling
        logits_task_a = self.classifier_task_a(pooled_output)
        logits_task_b = self.classifier_task_b(pooled_output)
        return logits_task_a, logits_task_b

# Define the model parameters
model_name = "sentence-transformers/paraphrase-mpnet-base-v2"
num_classes_task_a = 3  # e.g., Positive, Negative, Neutral
num_classes_task_b = 2  # e.g., Positive, Negative

# Initialize the model
model = MultiTaskModel(model_name, num_classes_task_a, num_classes_task_b)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Define label mappings
label_mapping_task_a = {0: "Positive", 1: "Negative", 2: "Neutral"}
label_mapping_task_b = {0: "Negative", 1: "Positive"}

# Sample sentences and labels for demonstration
sentences = [
    "This is a test sentence.",
    "Sentence transformers are great for encoding text.",
    "How do we generate embeddings for sentences?",
    "i feel pretty pathetic most of the time",
    "i have the feeling she was amused and delighted",
    "i started feeling sentimental about dolls i had as a child and so began a collection of vintage barbie dolls from the sixties",
    "i found myself feeling a little discouraged that morning",
    "i feel so worthless during those times i was struggling finding work"
]
labels_task_a = [2, 2, 2, 1, 0, 0, 1, 1]  # Dummy labels for sentence classification
labels_task_b = [1, 1, 1, 0, 1, 1, 0, 0]  # Dummy labels for sentiment analysis

# Tokenize sentences
inputs = tokenizer(sentences, padding=True, truncation=True, return_tensors="pt")
input_ids = inputs['input_ids']
attention_mask = inputs['attention_mask']

# Convert labels to tensors and ensure they match the batch size
labels_task_a = torch.tensor(labels_task_a)
labels_task_b = torch.tensor(labels_task_b)

# Define a dummy training step
optimizer = torch.optim.Adam(model.parameters(), lr=1e-5)
criterion_task_a = nn.CrossEntropyLoss()
criterion_task_b = nn.CrossEntropyLoss()

# Forward pass
logits_task_a, logits_task_b = model(input_ids, attention_mask)

# Compute losses
loss_task_a = criterion_task_a(logits_task_a, labels_task_a)
loss_task_b = criterion_task_b(logits_task_b, labels_task_b)

# Total loss
total_loss = loss_task_a + loss_task_b

# Backward pass and optimization
total_loss.backward()
optimizer.step()

print("Training step completed with total loss:", total_loss.item())

# Display logits and mapped labels for each task
print("\nLogits for Task A (Sentence Classification):")
print(logits_task_a)
predictions_task_a = torch.argmax(logits_task_a, dim=1)
for i, sentence in enumerate(sentences):
    print(f"Sentence: {sentence}")
    print(f"Prediction Task A (Classification): {label_mapping_task_a[predictions_task_a[i].item()]}\n")

print("\nLogits for Task B (Sentiment Analysis):")
print(logits_task_b)
predictions_task_b = torch.argmax(logits_task_b, dim=1)
for i, sentence in enumerate(sentences):
    print(f"Sentence: {sentence}")
    print(f"Prediction Task B (Sentiment Analysis): {label_mapping_task_b[predictions_task_b[i].item()]}\n")


Training step completed with total loss: 1.7667675018310547

Logits for Task A (Sentence Classification):
tensor([[-0.0066,  0.0147,  0.0684],
        [-0.0479, -0.0252,  0.1425],
        [-0.0551, -0.0288,  0.0831],
        [-0.0227,  0.0536,  0.0518],
        [-0.0628, -0.1398, -0.0142],
        [-0.0192,  0.0360, -0.0282],
        [ 0.0046, -0.0722,  0.0225],
        [-0.0164, -0.0131,  0.0531]], grad_fn=<AddmmBackward0>)
Sentence: This is a test sentence.
Prediction Task A (Classification): Neutral

Sentence: Sentence transformers are great for encoding text.
Prediction Task A (Classification): Neutral

Sentence: How do we generate embeddings for sentences?
Prediction Task A (Classification): Neutral

Sentence: i feel pretty pathetic most of the time
Prediction Task A (Classification): Negative

Sentence: i have the feeling she was amused and delighted
Prediction Task A (Classification): Neutral

Sentence: i started feeling sentimental about dolls i had as a child and so began a co

Task 3 Transfer Learning

In [None]:
import torch
from torch import nn
from transformers import AutoTokenizer, AutoModel

class MultiTaskModel(nn.Module):
    def __init__(self, model_name, num_classes_task_a, num_classes_task_b):
        super(MultiTaskModel, self).__init__()
        self.encoder = AutoModel.from_pretrained(model_name)
        self.classifier_task_a = nn.Linear(self.encoder.config.hidden_size, num_classes_task_a)
        self.classifier_task_b = nn.Linear(self.encoder.config.hidden_size, num_classes_task_b)

    def forward(self, input_ids, attention_mask):
        outputs = self.encoder(input_ids, attention_mask=attention_mask)
        pooled_output = outputs.last_hidden_state.mean(dim=1)  # Mean pooling
        logits_task_a = self.classifier_task_a(pooled_output)
        logits_task_b = self.classifier_task_b(pooled_output)
        return logits_task_a, logits_task_b

# Define model parameters
model_name = "sentence-transformers/paraphrase-mpnet-base-v2"
num_classes_task_a = 3  # e.g., Positive, Negative, Neutral
num_classes_task_b = 2  # e.g., Positive, Negative

# Initialize the model
model = MultiTaskModel(model_name, num_classes_task_a, num_classes_task_b)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Freeze the transformer backbone initially
for param in model.encoder.parameters():
    param.requires_grad = False

# Dummy optimizer and loss function
optimizer = torch.optim.Adam([
    {'params': model.classifier_task_a.parameters(), 'lr': 1e-3},
    {'params': model.classifier_task_b.parameters(), 'lr': 1e-3}
])
criterion_task_a = nn.CrossEntropyLoss()
criterion_task_b = nn.CrossEntropyLoss()


labels_task_a = [0, 1, 2]  # Dummy labels for sentence classification
labels_task_b = [1, 0, 1]  # Dummy labels for sentiment analysis

# Tokenize sentences
inputs = tokenizer(sentences, padding=True, truncation=True, return_tensors="pt")
input_ids = inputs['input_ids']
attention_mask = inputs['attention_mask']

# Training loop (dummy example)
for epoch in range(10):  # Example epoch count
    optimizer.zero_grad()
    logits_task_a, logits_task_b = model(input_ids, attention_mask)
    loss_task_a = criterion_task_a(logits_task_a, torch.tensor(labels_task_a))
    loss_task_b = criterion_task_b(logits_task_b, torch.tensor(labels_task_b))
    total_loss = loss_task_a + loss_task_b
    total_loss.backward()
    optimizer.step()

    if epoch == 5:  # Example of gradually unfreezing layers after a few epochs
        for param in model.encoder.parameters():
            param.requires_grad = True
        optimizer.add_param_group({'params': model.encoder.parameters(), 'lr': 1e-5})

    print(f"Epoch {epoch + 1}, Loss: {total_loss.item()}")

print("Training completed.")


TASK 4 Implementing Layer wise learning

In [24]:
import torch
from torch import nn
from transformers import AutoTokenizer, AutoModel

class MultiTaskModel(nn.Module):
    def __init__(self, model_name, num_classes_task_a, num_classes_task_b):
        super(MultiTaskModel, self).__init__()
        self.encoder = AutoModel.from_pretrained(model_name)
        self.classifier_task_a = nn.Linear(self.encoder.config.hidden_size, num_classes_task_a)
        self.classifier_task_b = nn.Linear(self.encoder.config.hidden_size, num_classes_task_b)

    def forward(self, input_ids, attention_mask):
        outputs = self.encoder(input_ids, attention_mask=attention_mask)
        pooled_output = outputs.last_hidden_state.mean(dim=1)  # Mean pooling
        logits_task_a = self.classifier_task_a(pooled_output)
        logits_task_b = self.classifier_task_b(pooled_output)
        return logits_task_a, logits_task_b

# Define model parameters
model_name = "sentence-transformers/paraphrase-mpnet-base-v2"
num_classes_task_a = 3  # e.g., Positive, Negative, Neutral
num_classes_task_b = 2  # e.g., Positive, Negative

# Initialize the model
model = MultiTaskModel(model_name, num_classes_task_a, num_classes_task_b)
tokenizer = AutoTokenizer.from_pretrained(model_name)


# Define layer-wise learning rates
def get_optimizer_grouped_parameters(model, base_lr, lr_decay):
    # List of parameters and their associated learning rates
    grouped_parameters = [
        {
            'params': [p for n, p in model.named_parameters() if 'encoder.layer.' not in n],
            'lr': base_lr
        }
    ]

    # Assign decaying learning rates to each transformer layer
    for i, layer in enumerate(model.encoder.encoder.layer):
        lr = base_lr * (lr_decay ** i)
        grouped_parameters.append({
            'params': layer.parameters(),
            'lr': lr
        })

    return grouped_parameters

base_lr = 1e-4
lr_decay = 0.95  # Decay factor for learning rates of successive layers

# Get grouped parameters with layer-wise learning rates
optimizer_grouped_parameters = get_optimizer_grouped_parameters(model, base_lr, lr_decay)

# Define the optimizer
optimizer = torch.optim.Adam(optimizer_grouped_parameters)

# Sample sentences and labels for demonstration


labels_task_a = [2, 2, 2, 1, 0, 0, 1, 1]  # Dummy labels for sentence classification
labels_task_b = [1, 1, 1, 0, 1, 1, 0, 0]  # Dummy labels for sentiment analysis

# Tokenize sentences
inputs = tokenizer(sentences, padding=True, truncation=True, return_tensors="pt")
input_ids = inputs['input_ids']
attention_mask = inputs['attention_mask']

# Convert labels to tensors and ensure they match the batch size
labels_task_a = torch.tensor(labels_task_a)
labels_task_b = torch.tensor(labels_task_b)

# Define loss functions
criterion_task_a = nn.CrossEntropyLoss()
criterion_task_b = nn.CrossEntropyLoss()

# Training loop (dummy example)
for epoch in range(10):  # Example epoch count
    optimizer.zero_grad()
    logits_task_a, logits_task_b = model(input_ids, attention_mask)
    loss_task_a = criterion_task_a(logits_task_a, labels_task_a)
    loss_task_b = criterion_task_b(logits_task_b, labels_task_b)
    total_loss = loss_task_a + loss_task_b
    total_loss.backward()
    optimizer.step()
    print(f"Epoch {epoch + 1}, Loss: {total_loss.item()}")

print("Training completed.")

# Function to test the model with new sentences
def test_model(sentences):
    inputs = tokenizer(sentences, padding=True, truncation=True, return_tensors="pt")
    input_ids, attention_mask = inputs['input_ids'], inputs['attention_mask']
    model.eval()  # Set the model to evaluation mode
    with torch.no_grad():
        logits_task_a, logits_task_b = model(input_ids, attention_mask)
    predictions_task_a = torch.argmax(logits_task_a, dim=1)
    predictions_task_b = torch.argmax(logits_task_b, dim=1)
    label_mapping_task_a = {0: "Positive", 1: "Negative", 2: "Neutral"}
    label_mapping_task_b = {0: "Negative", 1: "Positive"}
    for i, sentence in enumerate(sentences):
        print(f"Sentence: {sentence}")
        print(f"Prediction Task A (Classification): {label_mapping_task_a[predictions_task_a[i].item()]}")
        print(f"Prediction Task B (Sentiment Analysis): {label_mapping_task_b[predictions_task_b[i].item()]}\n")



Epoch 1, Loss: 1.8328710794448853
Epoch 2, Loss: 1.5159473419189453
Epoch 3, Loss: 1.2859177589416504
Epoch 4, Loss: 1.1105759143829346
Epoch 5, Loss: 0.9664291739463806
Epoch 6, Loss: 0.8539605736732483
Epoch 7, Loss: 0.7622511386871338
Epoch 8, Loss: 0.6880239248275757
Epoch 9, Loss: 0.6273497343063354
Epoch 10, Loss: 0.5778219103813171
Training completed.


In [None]:
# TEST WITH MORE SENTENCES HERE
test_sentences = ["This movie was great!", "I am unhappy with the service.", "The book was boring."]
test_model(test_sentences)