In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
from google.colab import drive
import shutil
import os
import ast

## Getting the required files from the Drive

In [None]:
# Define source and destination paths
drive_folder = "/content/drive/MyDrive/Projects/Stable-Diffusion-From-Scratch/sd"  # Replace with your folder name
destination_folder = "/content/"  # Current directory in Colab

# Ensure destination folder exists
os.makedirs(destination_folder, exist_ok=True)

# Copy all files from the Drive folder to the current directory
for filename in os.listdir(drive_folder):
    src = os.path.join(drive_folder, filename)
    dst = os.path.join(destination_folder, filename)
    shutil.copy(src, dst)

print("Files copied successfully!")

Files copied successfully!


## Downloading required models and libraries

In [None]:
%run -i '/content/init.py'

In [None]:
from auth import get_gemini_api_key
API_KEY = get_gemini_api_key()

## Loading the Stable Diffusion model

In [None]:
import model_loader # personal python script
import pipeline # personal python script
from PIL import Image
from pathlib import Path
from transformers import CLIPTokenizer
import torch
import matplotlib.pyplot as plt

DEVICE = "cuda"

ALLOW_CUDA = True
ALLOW_MPS = False

if torch.cuda.is_available() and ALLOW_CUDA:
    DEVICE = "cuda"
elif (torch.has_mps or torch.backends.mps.is_available()) and ALLOW_MPS:
    DEVICE = "mps"
print(f"Using device: {DEVICE}")

tokenizer = CLIPTokenizer("/content/data/vocab.json", merges_file="/content/data/merges.txt")
model_file = "/content/data/v1-5-pruned-emaonly.ckpt"
models = model_loader.preload_models_from_standard_weights(model_file, DEVICE)

Using device: cuda


## Loading the pdf

In [None]:
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("/content/Frankenstein_Project_Gutenberg_Small.pdf")
pages = loader.load_and_split()
# pages

In [None]:
pg = pages[0].page_content

## Prompting and fetching the response from Gemini

In [None]:
content = f"""The narrator is Frankenstein FYI
I want to make a story essay with the given context story. Only give me 5 points that summarizes it from third person pronouns and nouns (Use character names), make sure the story has a good flow and give the output in the form of a python list.
context: '{pg}' """

In [None]:
from google import genai

client = genai.Client(api_key=API_KEY)
response = client.models.generate_content(
    model="gemini-2.0-flash", contents=content
)
response_text = response.text

In [None]:
clean_text = response_text.replace("\n", "").replace("```", "").replace("python", "").strip()

In [None]:
# Convert string to dictionary
story_summary = ast.literal_eval(clean_text)

## Setting the hyperparameters of the Stable Diffusion model

In [None]:
uncond_prompt = "blurry, low quality, distorted"  # Also known as negative prompt
do_cfg = True
cfg_scale = 10  # min: 1, max: 14. Controls the strength of guidance toward the prompt
prompts = story_summary
input_image = None
strength = 0.9
sampler = "ddpm"
num_inference_steps = 50
seed = None

# Running the inference and saving the images

In [None]:
for i, prompt in enumerate(prompts, start=1):

  output_image = pipeline.generate(
      prompt=prompt,
      uncond_prompt=uncond_prompt,
      input_image=input_image,
      strength=strength,
      do_cfg=do_cfg,
      cfg_scale=cfg_scale,
      sampler_name=sampler,
      n_inference_steps=num_inference_steps,
      seed=seed,
      models=models,
      device=DEVICE,
      idle_device="cpu",
      tokenizer=tokenizer,
  )
  # Display and save the image with title using matplotlib
  plt.figure(figsize=(8, 8))  # Adjust figure size if needed
  plt.imshow(output_image)
  plt.title(prompt, fontsize=12, wrap=True)  # Add title
  plt.axis("off")
  # plt.show()

    # Save the image inside the 'images' folder
  image_path = os.path.join("images", f"img{i}.jpg")
  plt.savefig(image_path, bbox_inches="tight", pad_inches=0.2)  # Save with padding
  plt.close()  # Close figure to free memory


100%|██████████| 50/50 [00:30<00:00,  1.65it/s]
100%|██████████| 50/50 [00:31<00:00,  1.60it/s]
100%|██████████| 50/50 [00:30<00:00,  1.66it/s]
100%|██████████| 50/50 [00:30<00:00,  1.62it/s]
100%|██████████| 50/50 [00:30<00:00,  1.65it/s]
