In [2]:
# Install Hugging Face libraries
!pip install diffusers transformers accelerate datasets
!pip install google-cloud-storage

Collecting datasets
  Downloading datasets-3.1.0-py3-none-any.whl.metadata (20 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.1.0-py3-none-any.whl (480 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m14.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m10.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2024.9.0-py3-none-any.whl 

In [3]:
# Authenticate to Google Cloud Storage
from google.colab import auth
auth.authenticate_user()

In [None]:
# Set up GCS credentials
!gcloud auth application-default login

# Access Data from Bucket

In [4]:
from google.cloud import storage
import os

# Set up GCS client and bucket information
bucket_name = 'flicker30k_dataset'
local_dataset_dir = '/content/dataset/'

# Create local directory to store dataset
os.makedirs(local_dataset_dir, exist_ok=True)

# Initialize GCS client
client = storage.Client()
bucket = client.bucket(bucket_name)

count = 0

# Download files from the bucket
blobs = bucket.list_blobs()  # No prefix needed as all files are at the root level
for blob in blobs:
  if blob.name == "results.csv":
    local_path = os.path.join(local_dataset_dir, blob.name)
    blob.download_to_filename(local_path)
    print(f"Downloaded: {blob.name}")

Downloaded: results.csv


In [1]:
from diffusers import StableDiffusionPipeline
import torch

# Load pre-trained Stable Diffusion model
model_name = "CompVis/stable-diffusion-v1-4"  # Change this to the model of choice
pipeline = StableDiffusionPipeline.from_pretrained(model_name, torch_dtype=torch.float16)
pipeline = pipeline.to("cuda")  # Use GPU for acceleration


The cache for model files in Transformers v4.22.0 has been updated. Migrating your old cache. This is a one-time only operation. You can interrupt this and resume the migration later on by calling `transformers.utils.move_cache()`.


0it [00:00, ?it/s]

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model_index.json:   0%|          | 0.00/541 [00:00<?, ?B/s]

Fetching 16 files:   0%|          | 0/16 [00:00<?, ?it/s]

model.safetensors:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

scheduler/scheduler_config.json:   0%|          | 0.00/313 [00:00<?, ?B/s]

(…)ature_extractor/preprocessor_config.json:   0%|          | 0.00/342 [00:00<?, ?B/s]

safety_checker/config.json:   0%|          | 0.00/4.56k [00:00<?, ?B/s]

(…)kpoints/scheduler_config-checkpoint.json:   0%|          | 0.00/209 [00:00<?, ?B/s]

tokenizer/merges.txt:   0%|          | 0.00/525k [00:00<?, ?B/s]

text_encoder/config.json:   0%|          | 0.00/592 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/492M [00:00<?, ?B/s]

tokenizer/tokenizer_config.json:   0%|          | 0.00/806 [00:00<?, ?B/s]

diffusion_pytorch_model.safetensors:   0%|          | 0.00/3.44G [00:00<?, ?B/s]

vae/config.json:   0%|          | 0.00/551 [00:00<?, ?B/s]

tokenizer/special_tokens_map.json:   0%|          | 0.00/472 [00:00<?, ?B/s]

tokenizer/vocab.json:   0%|          | 0.00/1.06M [00:00<?, ?B/s]

unet/config.json:   0%|          | 0.00/743 [00:00<?, ?B/s]

diffusion_pytorch_model.safetensors:   0%|          | 0.00/335M [00:00<?, ?B/s]

Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

In [11]:
import torch
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
from google.cloud import storage
from PIL import Image
import pandas as pd
from io import BytesIO
from diffusers import DDPMScheduler, UNet2DConditionModel, AutoencoderKL
from transformers import CLIPTextModel, CLIPTokenizer
from accelerate import Accelerator
import requests

# Custom Dataset Class for GCS and Prompts
class GCSDataset(Dataset):
    def __init__(self, bucket_name, csv_path, transform=None):
        self.client = storage.Client()
        self.bucket = self.client.bucket(bucket_name)
        self.data = pd.read_csv(csv_path, delimiter="|")
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        # Get image name and prompt
        image_name = self.data.iloc[idx]["image_name"]
        prompt = self.data.iloc[idx]["comment"]

        # Fetch the image from GCS
        blob = self.bucket.blob(image_name)
        image_url = f"https://storage.googleapis.com/{self.bucket.name}/{blob.name}"
        response = requests.get(image_url)
        print(response)
        image = Image.open(BytesIO(response.content)).convert("RGB")

        # Apply transformations if needed
        if self.transform:
            image = self.transform(image)

        return {"image": image, "text": prompt}


# Define the Dataset
bucket_name = "flicker30k_dataset"
csv_path = "dataset/results.csv"
transform = transforms.Compose([
    transforms.Resize((256, 256)),  # Resize images to a fixed size
    transforms.ToTensor(),          # Convert images to tensor
])

dataset = GCSDataset(bucket_name=bucket_name, csv_path=csv_path, transform=transform)

# DataLoader for training
train_dataloader = DataLoader(dataset, batch_size=8, shuffle=True)

# Define training components
model_name = "CompVis/stable-diffusion-v1-4"  # Example model name, adjust accordingly
noise_scheduler = DDPMScheduler.from_pretrained(model_name, subfolder="scheduler")
text_encoder = CLIPTextModel.from_pretrained(model_name, subfolder="text_encoder")
tokenizer = CLIPTokenizer.from_pretrained(model_name, subfolder="tokenizer")
vae = AutoencoderKL.from_pretrained(model_name, subfolder="vae")
unet = UNet2DConditionModel.from_pretrained(model_name, subfolder="unet")

# Define the optimizer
learning_rate = 1e-4  # Adjust learning rate as necessary
optimizer = torch.optim.AdamW(unet.parameters(), lr=learning_rate)

# Initialize accelerator for distributed training
accelerator = Accelerator(mixed_precision="fp16")

# Prepare for training
unet, optimizer, train_dataloader = accelerator.prepare(unet, optimizer, train_dataloader)

# Training Loop
num_epochs = 10
for epoch in range(num_epochs):
    for batch in train_dataloader:
        # Preprocess batch
        inputs = torch.stack([transforms.ToTensor()(img) for img in batch['image']]).to("cuda")
        captions = batch['text']
        captions = tokenizer(captions, return_tensors="pt", padding=True).to("cuda")

        # Forward pass
        noise = torch.randn_like(inputs)
        noisy_inputs = noise_scheduler.add_noise(inputs, noise, epoch)
        model_outputs = unet(noisy_inputs, captions.input_ids)

        # Compute loss and update weights
        loss = torch.nn.functional.mse_loss(model_outputs, noise)
        accelerator.backward(loss)
        optimizer.step()
        optimizer.zero_grad()

    print(f"Epoch {epoch + 1}/{num_epochs} complete. Loss: {loss.item()}")


OutOfMemoryError: CUDA out of memory. Tried to allocate 114.00 MiB. GPU 0 has a total capacity of 14.75 GiB of which 99.06 MiB is free. Process 19035 has 14.65 GiB memory in use. Of the allocated memory 14.37 GiB is allocated by PyTorch, and 185.27 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
output_dir = "/content/fine_tuned_model"
pipeline.save_pretrained(output_dir)
print(f"Model saved to {output_dir}")


In [12]:
!nvidia-smi

Tue Nov 26 20:51:20 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla T4                       Off | 00000000:00:04.0 Off |                    0 |
| N/A   65C    P0              30W /  70W |  15003MiB / 15360MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    