<a href="https://colab.research.google.com/github/SathishKumarAI/Fetch/blob/main/test.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install transformers matplotlib scikit-learn




In [None]:
import torch
import torch.nn as nn
from transformers import AutoModel, AutoTokenizer
from typing import List
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
import numpy as np


In [None]:
import torch
import torch.nn as nn
from transformers import AutoModel, AutoTokenizer
from typing import List, Union
import torch.nn.functional as F


class PoolingStrategy(nn.Module):
    def __init__(self, strategy: str):
        super(PoolingStrategy, self).__init__()
        if strategy not in ["cls", "mean", "max"]:
            raise ValueError("Invalid pooling type.")
        self.strategy = strategy

    def forward(self, hidden_states: torch.Tensor, attention_mask: torch.Tensor) -> torch.Tensor:
        if self.strategy == "cls":
            return hidden_states[:, 0]
        elif self.strategy == "mean":
            return (hidden_states * attention_mask.unsqueeze(-1)).sum(1) / attention_mask.sum(1, keepdim=True)
        elif self.strategy == "max":
            masked = hidden_states * attention_mask.unsqueeze(-1)
            return torch.max(masked, dim=1).values


class SentenceEncoder(nn.Module):
    def __init__(
        self,
        model_name: str = "bert-base-uncased",
        pooling: str = "mean",
        normalize: bool = True,
        freeze_encoder: bool = False,
        use_amp: bool = False,
    ):
        super(SentenceEncoder, self).__init__()
        self.pooling_layer = PoolingStrategy(pooling)
        self.encoder = AutoModel.from_pretrained(model_name)
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.normalize = normalize
        self.use_amp = use_amp

        if freeze_encoder:
            for param in self.encoder.parameters():
                param.requires_grad = False

    def forward(self, input_ids: torch.Tensor, attention_mask: torch.Tensor) -> torch.Tensor:
        with torch.cuda.amp.autocast(enabled=self.use_amp):
            outputs = self.encoder(input_ids=input_ids, attention_mask=attention_mask)
            embeddings = self.pooling_layer(outputs.last_hidden_state, attention_mask)

        if self.normalize:
            embeddings = F.normalize(embeddings, p=2, dim=1)

        return embeddings

    def encode(self, sentences: Union[str, List[str]], device: str = None, batch_size: int = 16) -> torch.Tensor:
        if isinstance(sentences, str):
            sentences = [sentences]

        if device is None:
            device = "cuda" if torch.cuda.is_available() else "cpu"

        self.to(device)
        self.eval()

        all_embeddings = []

        for i in range(0, len(sentences), batch_size):
            batch = sentences[i:i + batch_size]
            encoded = self.tokenizer(batch, return_tensors='pt', padding=True, truncation=True, max_length=128)
            input_ids = encoded['input_ids'].to(device)
            attention_mask = encoded['attention_mask'].to(device)

            with torch.no_grad():
                emb = self.forward(input_ids, attention_mask)
                all_embeddings.append(emb.cpu())

        return torch.cat(all_embeddings, dim=0)


In [None]:

# ✅ Run example in Colab
sentences = [
    "Machine learning is fascinating.",
    "Transformers are very powerful for NLP.",
    "Sentence embeddings are useful."
]


In [None]:
pooling_strategy = "mean"  # can be 'mean', 'cls', or 'max'
encoder = SentenceEncoder(pooling=pooling_strategy)
embeddings = encoder.encode(sentences)
print(embeddings.shape)
# torch.Size([3, 768]) ← 3 sentences, each encoded into a 768-dim vector

# t-SNE visualization
tsne = TSNE(n_components=2, random_state=42, perplexity=2)  # Changed perplexity to 2
reduced = tsne.fit_transform(embeddings.numpy())

# plt.figure(figsize=(8, 6))
# for i, sentence in enumerate(sentences):
#     plt.scatter(reduced[i, 0], reduced[i, 1])
#     plt.annotate(sentence, (reduced[i, 0], reduced[i, 1]))
# plt.title(f"t-SNE Visualization of Sentence Embeddings ({pooling_strategy} pooling)")
# plt.show()


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
  with torch.cuda.amp.autocast(enabled=self.use_amp):


torch.Size([3, 768])


In [None]:
# model/multi_task_model.py

import torch
import torch.nn as nn


class MultiTaskModel(nn.Module):
    def __init__(self,
                 model_name="bert-base-uncased",
                 pooling="mean",
                 num_classes_task_a=5,
                 num_classes_task_b=3,
                 hidden_dim=768):
        super(MultiTaskModel, self).__init__()

        # Shared encoder from Task 1
        self.encoder = SentenceEncoder(model_name=model_name, pooling=pooling)

        # Task A head: Sentence Classification
        self.task_a_head = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(hidden_dim, num_classes_task_a)
        )

        # Task B head: Sentiment Analysis (or other)
        self.task_b_head = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(hidden_dim, num_classes_task_b)
        )

    def forward(self, input_ids, attention_mask):
        embeddings = self.encoder.forward(input_ids, attention_mask)
        out_a = self.task_a_head(embeddings)
        out_b = self.task_b_head(embeddings)
        return out_a, out_b

    def predict(self, sentences, device='cpu'):
        self.eval()
        with torch.no_grad():
            encoded_input = self.encoder.tokenizer(
                sentences, return_tensors='pt', padding=True, truncation=True
            )
            input_ids = encoded_input['input_ids'].to(device)
            attention_mask = encoded_input['attention_mask'].to(device)
            out_a, out_b = self.forward(input_ids, attention_mask)
        return out_a, out_b


In [None]:
model = MultiTaskModel(pooling="mean", num_classes_task_a=4, num_classes_task_b=3)
logits_a, logits_b = model.predict(sentences)

print("Task A (Classification) Predictions:", logits_a.argmax(dim=-1))
print("Task B (Sentiment) Predictions:", logits_b.argmax(dim=-1))


  with torch.cuda.amp.autocast(enabled=self.use_amp):


Task A (Classification) Predictions: tensor([3, 1, 3])
Task B (Sentiment) Predictions: tensor([2, 1, 1])


In [None]:
# train/training_loop.py

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
# from model.multi_task_model import MultiTaskModel
import random

# ---- Hypothetical Dataset ----
class DummyMultiTaskDataset(Dataset):
    def __init__(self, tokenizer, num_samples=100):
        self.sentences = [
            f"Sample sentence number {i}" for i in range(num_samples)
        ]
        self.labels_a = [random.randint(0, 4) for _ in range(num_samples)]  # 5 classes
        self.labels_b = [random.randint(0, 2) for _ in range(num_samples)]  # 3 classes
        self.tokenizer = tokenizer

    def __len__(self):
        return len(self.sentences)

    def __getitem__(self, idx):
        text = self.sentences[idx]
        input = self.tokenizer(text, padding='max_length', truncation=True, max_length=32, return_tensors='pt')
        return {
            'input_ids': input['input_ids'].squeeze(0),
            'attention_mask': input['attention_mask'].squeeze(0),
            'label_a': torch.tensor(self.labels_a[idx]),
            'label_b': torch.tensor(self.labels_b[idx]),
        }

# ---- Training Loop ----
def train_loop():
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = MultiTaskModel(pooling="mean", num_classes_task_a=5, num_classes_task_b=3).to(device)

    tokenizer = model.encoder.tokenizer
    dataset = DummyMultiTaskDataset(tokenizer)
    dataloader = DataLoader(dataset, batch_size=8, shuffle=True)

    criterion_a = nn.CrossEntropyLoss()
    criterion_b = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=2e-5)

    alpha = 0.5  # weighting between Task A and B

    for epoch in range(200):
        model.train()
        total_loss = 0
        correct_a = correct_b = total = 0

        for batch in dataloader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            label_a = batch['label_a'].to(device)
            label_b = batch['label_b'].to(device)

            optimizer.zero_grad()
            out_a, out_b = model(input_ids, attention_mask)

            loss_a = criterion_a(out_a, label_a)
            loss_b = criterion_b(out_b, label_b)
            loss = alpha * loss_a + (1 - alpha) * loss_b

            loss.backward()
            optimizer.step()

            total_loss += loss.item()

            # Compute accuracy (simplified)
            correct_a += (out_a.argmax(dim=1) == label_a).sum().item()
            correct_b += (out_b.argmax(dim=1) == label_b).sum().item()
            total += label_a.size(0)

        acc_a = correct_a / total
        acc_b = correct_b / total
        print(f"Epoch {epoch+1}: Loss={total_loss:.4f} | TaskA_Acc={acc_a:.3f} | TaskB_Acc={acc_b:.3f}")

if __name__ == "__main__":
    train_loop()


  with torch.cuda.amp.autocast(enabled=self.use_amp):


Epoch 1: Loss=17.6359 | TaskA_Acc=0.180 | TaskB_Acc=0.340
Epoch 2: Loss=17.6008 | TaskA_Acc=0.180 | TaskB_Acc=0.330
Epoch 3: Loss=17.5843 | TaskA_Acc=0.230 | TaskB_Acc=0.340
Epoch 4: Loss=17.5738 | TaskA_Acc=0.280 | TaskB_Acc=0.340
Epoch 5: Loss=17.5659 | TaskA_Acc=0.280 | TaskB_Acc=0.340
Epoch 6: Loss=17.5285 | TaskA_Acc=0.280 | TaskB_Acc=0.340
Epoch 7: Loss=17.5111 | TaskA_Acc=0.280 | TaskB_Acc=0.370
Epoch 8: Loss=17.4914 | TaskA_Acc=0.280 | TaskB_Acc=0.440
Epoch 9: Loss=17.4381 | TaskA_Acc=0.290 | TaskB_Acc=0.540
Epoch 10: Loss=17.3711 | TaskA_Acc=0.300 | TaskB_Acc=0.510
Epoch 11: Loss=17.2439 | TaskA_Acc=0.330 | TaskB_Acc=0.610
Epoch 12: Loss=17.0917 | TaskA_Acc=0.470 | TaskB_Acc=0.710
Epoch 13: Loss=16.8812 | TaskA_Acc=0.520 | TaskB_Acc=0.760
Epoch 14: Loss=16.6467 | TaskA_Acc=0.540 | TaskB_Acc=0.800
Epoch 15: Loss=16.4832 | TaskA_Acc=0.570 | TaskB_Acc=0.780
Epoch 16: Loss=16.3243 | TaskA_Acc=0.550 | TaskB_Acc=0.850
Epoch 17: Loss=16.0102 | TaskA_Acc=0.580 | TaskB_Acc=0.880
Epoch 