<a href="https://colab.research.google.com/github/OneFineStarstuff/OneFineStarstuff/blob/main/Multi_modal_Model_Training.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import OneCycleLR

class UnifiedAGISystem(nn.Module):
    def __init__(self, text_dim, image_dim, sensor_dim, hidden_dim):
        super(UnifiedAGISystem, self).__init__()
        self.text_embed = nn.EmbeddingBag(text_dim, hidden_dim)
        self.image_embed = nn.Sequential(
            nn.Conv2d(image_dim[0], hidden_dim, kernel_size=3, stride=1, padding=1),
            nn.AdaptiveAvgPool2d((1, 1)),
            nn.Flatten()
        )
        self.sensor_embed = nn.Linear(sensor_dim, hidden_dim)
        self.fc = nn.Linear(hidden_dim * 3, 10)  # Example output size (adjust as needed)

    def forward(self, text, image, sensor_data):
        text_features = self.text_embed(text)
        image_features = self.image_embed(image)
        sensor_features = self.sensor_embed(sensor_data)
        combined_features = torch.cat((text_features, image_features, sensor_features), dim=1)
        return self.fc(combined_features)

class CustomDataset(Dataset):
    def __init__(self):
        self.text_data = torch.randint(0, 512, (100, 10))  # (num_samples, text_dim)
        self.image_data = torch.randn(100, 3, 224, 224)  # (num_samples, channels, height, width)
        self.sensor_data = torch.randn(100, 10)  # (num_samples, sensor_dim)
        self.labels = torch.randint(0, 10, (100,))  # 10 classes for classification

    def __len__(self):
        return len(self.text_data)

    def __getitem__(self, idx):
        return self.text_data[idx], self.image_data[idx], self.sensor_data[idx], self.labels[idx]

def train(model, train_loader, optimizer, scheduler, criterion, epochs, device):
    model.to(device)
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        for text, image, sensor_data, labels in train_loader:
            text, image, sensor_data, labels = text.to(device), image.to(device), sensor_data.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(text, image, sensor_data)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        scheduler.step()
        print(f"Epoch [{epoch+1}/{epochs}], Loss: {running_loss/len(train_loader)}")

def main():
    try:
        epochs = 10

        # Initialize model
        model = UnifiedAGISystem(text_dim=512, image_dim=(3, 224, 224), sensor_dim=10, hidden_dim=128)

        # Create Dataset and DataLoader
        dataset = CustomDataset()
        train_loader = DataLoader(dataset, batch_size=8, shuffle=True)  # Ensuring shuffling

        # Optimizer and scheduler setup
        optimizer = optim.AdamW(model.parameters(), lr=5e-5)
        scheduler = OneCycleLR(optimizer, max_lr=5e-5, steps_per_epoch=len(train_loader), epochs=epochs)
        criterion = nn.CrossEntropyLoss()

        # Set device to CUDA if available, otherwise CPU
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

        # Train the model
        train(model, train_loader, optimizer, scheduler, criterion, epochs, device)

    except Exception as e:
        print(f"Error in main: {e}")

if __name__ == "__main__":
    main()

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.distributed as dist
from torch.utils.data import Dataset, DataLoader
from torch.utils.data.distributed import DistributedSampler
from transformers import AdamW
from torch.optim.lr_scheduler import OneCycleLR
from torch.multiprocessing import spawn

# Define the model architecture
class UnifiedAGISystem(nn.Module):
    def __init__(self, text_dim, image_dim, sensor_dim, hidden_dim):
        super(UnifiedAGISystem, self).__init__()
        self.text_embed = nn.EmbeddingBag(text_dim, hidden_dim)
        self.image_embed = nn.Conv2d(image_dim[0], hidden_dim, kernel_size=3, stride=1, padding=1)
        self.sensor_embed = nn.Linear(sensor_dim, hidden_dim)
        self.fc = nn.Linear(hidden_dim * 3, 10)  # Example output size (adjust as needed)

    def forward(self, text, image, sensor_data):
        text_features = self.text_embed(text)
        image_features = self.image_embed(image).view(image.size(0), -1)
        sensor_features = self.sensor_embed(sensor_data)
        combined_features = torch.cat((text_features, image_features, sensor_features), dim=1)
        return self.fc(combined_features)

# Custom Dataset class
class CustomDataset(Dataset):
    def __init__(self):
        # Create synthetic data for demo purposes
        self.text_data = torch.randint(0, 512, (100, 10))  # (num_samples, text_dim)
        self.image_data = torch.randn(100, 3, 224, 224)  # (num_samples, channels, height, width)
        self.sensor_data = torch.randn(100, 10)  # (num_samples, sensor_dim)
        self.labels = torch.randint(0, 10, (100,))  # 10 classes for classification

    def __len__(self):
        return len(self.text_data)

    def __getitem__(self, idx):
        return self.text_data[idx], self.image_data[idx], self.sensor_data[idx], self.labels[idx]

# Function to set environment variables for distributed training
def set_environment_variables(rank, world_size):
    import os
    os.environ['MASTER_ADDR'] = 'localhost'  # or the IP address of the master node
    os.environ['MASTER_PORT'] = '29500'      # Choose a free port
    os.environ['RANK'] = str(rank)
    os.environ['WORLD_SIZE'] = str(world_size)

# Training function for DDP
def train_ddp(rank, world_size, model, train_loader, optimizer, scheduler, criterion, epochs):
    try:
        # Initialize the distributed process group
        dist.init_process_group(
            backend='nccl',
            world_size=world_size,
            rank=rank,
            init_method='env://'
        )

        # Set device
        torch.cuda.set_device(rank)
        model.to(rank)

        # Convert to DistributedDataParallel
        model = nn.parallel.DistributedDataParallel(model, device_ids=[rank])

        # Create DistributedSampler
        train_sampler = DistributedSampler(train_loader.dataset, num_replicas=world_size, rank=rank)
        train_loader.sampler = train_sampler

        print(f"Rank {rank}/{world_size} initialized.")

        # Training loop
        for epoch in range(epochs):
            model.train()
            train_sampler.set_epoch(epoch)
            running_loss = 0.0
            for text, image, sensor_data, labels in train_loader:
                text, image, sensor_data, labels = text.to(rank), image.to(rank), sensor_data.to(rank), labels.to(rank)

                # Zero gradients
                optimizer.zero_grad()

                # Forward pass
                outputs = model(text, image, sensor_data)

                # Compute loss
                loss = criterion(outputs, labels)

                # Backward pass and optimization
                loss.backward()
                optimizer.step()

                # Update loss
                running_loss += loss.item()

            # Step the scheduler
            scheduler.step()

            if rank == 0:  # Print loss only from rank 0
                print(f"Epoch [{epoch+1}/{epochs}], Loss: {running_loss/len(train_loader)}")

    except Exception as e:
        print(f"Error in rank {rank}: {e}")

    finally:
        # Cleanup
        dist.destroy_process_group()
        print(f"Rank {rank} finished.")

# Main function to setup distributed training
def main():
    try:
        world_size = 2  # Number of GPUs available
        epochs = 10

        # Initialize model
        model = UnifiedAGISystem(text_dim=512, image_dim=(3, 224, 224), sensor_dim=10, hidden_dim=128)

        # Create Dataset and DataLoader
        dataset = CustomDataset()
        train_loader = DataLoader(dataset, batch_size=8)

        # Optimizer and scheduler setup
        optimizer = AdamW(model.parameters(), lr=5e-5)
        scheduler = OneCycleLR(optimizer, max_lr=5e-5, steps_per_epoch=len(train_loader), epochs=epochs)
        criterion = nn.CrossEntropyLoss()

        # Ensure the number of GPUs is sufficient
        if torch.cuda.device_count() < world_size:
            raise ValueError(f"Not enough GPUs available. Expected {world_size} but found {torch.cuda.device_count()}.")

        # Spawn processes for distributed training
        print(f"Starting distributed training with {world_size} processes.")
        spawn(train_ddp, nprocs=world_size, args=(world_size, model, train_loader, optimizer, scheduler, criterion, epochs))

    except Exception as e:
        print(f"Error in main: {e}")

if __name__ == "__main__":
    main()