# Make sure to be using a Kuda GPU. Here The GPU output will depend on what you are using(in collab the Tesla T4 is free)

In [1]:
import torch
print("GPU Name:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "No GPU found")

GPU Name: NVIDIA GeForce RTX 3070


# Install Libraries


In [None]:
!pip install diffusers transformers accelerate torch torchvision
!pip install transformers scipy ftfy accelerate


Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

# Create Pipe and Model

In [2]:
from diffusers import StableDiffusionPipeline
import torch

# get v-1.5 of stable dissufusion(more recent than collab tutorial)
model_id = "runwayml/stable-diffusion-v1-5"

# remove "torch_dtype=torch.float16" arg to increase result performace but decrease speed/opitimization
pipe = StableDiffusionPipeline.from_pretrained(model_id,torch_dtype=torch.float16)

# Use this if you have to use cpu/mps
# device = "mps" if torch.backends.mps.is_available() else "cpu"
# pipe.to(device)

# Use this if you have Cuda
pipe.to("cuda")

  from .autonotebook import tqdm as notebook_tqdm
Loading pipeline components...: 100%|██████████| 7/7 [00:09<00:00,  1.35s/it]


StableDiffusionPipeline {
  "_class_name": "StableDiffusionPipeline",
  "_diffusers_version": "0.32.2",
  "_name_or_path": "runwayml/stable-diffusion-v1-5",
  "feature_extractor": [
    "transformers",
    "CLIPImageProcessor"
  ],
  "image_encoder": [
    null,
    null
  ],
  "requires_safety_checker": true,
  "safety_checker": [
    "stable_diffusion",
    "StableDiffusionSafetyChecker"
  ],
  "scheduler": [
    "diffusers",
    "PNDMScheduler"
  ],
  "text_encoder": [
    "transformers",
    "CLIPTextModel"
  ],
  "tokenizer": [
    "transformers",
    "CLIPTokenizer"
  ],
  "unet": [
    "diffusers",
    "UNet2DConditionModel"
  ],
  "vae": [
    "diffusers",
    "AutoencoderKL"
  ]
}

In [3]:
def disable_nsfw_filter(pipe):
    """Overrides the NSFW filter to prevent black images and errors."""
    def dummy_checker(images, **kwargs):
        # Force output to always return a list of `False`
        return images, [False] * len(images)  # Ensures it's always iterable

    pipe.safety_checker = dummy_checker  # Override the default safety checker

# Apply the fix
disable_nsfw_filter(pipe)

# Tokenize and truncate text to fit CLIP

In [4]:
import spacy
from nltk.corpus import stopwords
import nltk

# Make sure stopwords are available
nltk.download('stopwords')
stop_words = set(stopwords.words("english"))

# Load SpaCy model
import spacy.cli
spacy.cli.download("en_core_web_sm")
nlp = spacy.load("en_core_web_sm")

def process_prompt(text, max_tokens=77):
    """Filters prompt using SpaCy POS tagging and NLTK stopwords (no NLTK tokenization)."""

    # Quick check using simple split
    if len(text.split()) <= max_tokens:
        return text

    # SpaCy tokenization and filtering
    doc = nlp(text)
    keywords = [
        token.text for token in doc
        if token.pos_ in {"ADJ", "NOUN", "PROPN", "ADV"}
        and token.text.lower() not in stop_words
    ]

    # Return trimmed result
    optimized_prompt = " ".join(keywords[:max_tokens])
    return optimized_prompt

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Austin\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [None]:
import pandas as pd
import os
from PIL import Image

# File paths
csv_path = "homework_2.csv"
output_folder = r"C:\Users\Austin\Desktop\DSCI 550\Homework 2\Pics"

# Create output folder if it doesn't exist
os.makedirs(output_folder, exist_ok=True)

# Read the CSV file
df = pd.read_csv(csv_path)

# Keep only the first 10 samples
# df = df.head(10)

# Add a new column for image paths
image_paths = []

# Loop through each caption and generate/save image
for idx, row in df.iterrows():
    raw_prompt = row['description']
    clean_prompt = process_prompt(raw_prompt)

    # Generate the image
    image = pipe(clean_prompt, num_inference_steps=70).images[0]

    # Define image save path
    filename = f"image_{idx + 1}.png"
    save_path = os.path.join(output_folder, filename)

    # Save the image
    image.save(save_path)

    # Record path in new column
    image_paths.append(save_path)

# Add image paths to the dataframe
df['Image Path'] = image_paths

# Optionally save the updated CSV (can update the path if needed)
df.to_csv(os.path.join(output_folder, "image_captions_with_paths_desc.csv"), index=False)

print("✅ All images generated and saved. Updated CSV written with 'Image Path' column.")

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['honeycreek road witnesses nearby seidman']
100%|██████████| 70/70 [00:07<00:00,  9.41it/s]
100%|██████████| 70/70 [00:06<00:00, 11.40it/s]
The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['however family']
100%|██████████| 70/70 [00:07<00:00,  9.91it/s]
The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['morning ceiling bed finally']
100%|██████████| 70/70 [00:06<00:00, 10.42it/s]
The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['reply .']
100%|██████████| 70/70 [00:05<00:00, 11.85it/s]


✅ All images generated and saved. Updated CSV written with 'Image Path' column.
