<a href="https://colab.research.google.com/github/OneFineStarstuff/OneFineStarstuff/blob/main/Pseudo_Labeling_Technique.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset, ConcatDataset
from torchvision import datasets, transforms

# Placeholder for your actual model
class SimpleModel(nn.Module):
    def __init__(self):
        super(SimpleModel, self).__init__()
        self.fc = nn.Linear(28 * 28, 10)

    def forward(self, x):
        return self.fc(x.view(x.size(0), -1))

# Custom dataset class for pseudo-labeled data
class PseudoLabelDataset(Dataset):
    def __init__(self, images, pseudo_labels):
        self.images = images
        self.pseudo_labels = pseudo_labels

    def __len__(self):
        return len(self.pseudo_labels)

    def __getitem__(self, idx):
        return self.images[idx], self.pseudo_labels[idx]

# Placeholder functions for data loaders
def get_labeled_loader():
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))
    ])
    dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
    return DataLoader(dataset, batch_size=32, shuffle=True)

def get_unlabeled_loader():
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))
    ])
    dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)
    return DataLoader(dataset, batch_size=32, shuffle=False)

# Confidence threshold function
def confidence_threshold(predictions, threshold=0.9):
    probs = torch.softmax(predictions, dim=1)
    max_probs, _ = torch.max(probs, dim=1)
    return max_probs > threshold

# Train function
def train(model, data_loader, optimizer, criterion):
    model.train()
    for images, labels in data_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        loss = criterion(outputs, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

# Initialize model, optimizer, and loss function
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = SimpleModel().to(device)
optimizer = optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss()

# Data loaders
labeled_loader = get_labeled_loader()
unlabeled_loader = get_unlabeled_loader()

# Define the number of epochs
num_epochs = 10

# Pseudo-labeling
for epoch in range(num_epochs):
    model.eval()
    pseudo_labeled_images = []
    pseudo_labels = []

    for images, _ in unlabeled_loader:
        images = images.to(device)
        preds = model(images)
        confident_mask = confidence_threshold(preds)

        if confident_mask.any():
            pseudo_labeled_images.append(images[confident_mask].cpu())
            pseudo_labels.append(preds[confident_mask].argmax(dim=1).cpu())

    if pseudo_labeled_images:
        pseudo_labeled_images = torch.cat(pseudo_labeled_images)
        pseudo_labels = torch.cat(pseudo_labels)
        pseudo_dataset = PseudoLabelDataset(pseudo_labeled_images, pseudo_labels)
        combined_dataset = ConcatDataset([labeled_loader.dataset, pseudo_dataset])
        combined_loader = DataLoader(combined_dataset, batch_size=32, shuffle=True)

        # Retrain model with combined dataset
        train(model, combined_loader, optimizer, criterion)

    # Optionally, validate model performance here

print("Pseudo-labeling and training completed!")