<a href="https://colab.research.google.com/github/Sheryar-bit/AI-Integrations/blob/main/FYP_MODEL.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from huggingface_hub import notebook_login
notebook_login()

In [None]:
!pip install git+https://github.com/huggingface/diffusers.git
!pip install -U -q transformers accelerate bitsandbytes peft datasets
!pip install -q opencv-python-headless matplotlib

!rm -rf diffusers
!git clone https://github.com/huggingface/diffusers.git

In [None]:
from google.colab import drive
import os

drive.mount('/content/drive')

DATASET_PATH = "/content/drive/MyDrive/Logos"
OUTPUT_DIR = "/content/drive/MyDrive/logo_model_output"

if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)

In [None]:
import torch
from PIL import Image
from transformers import BlipProcessor, BlipForConditionalGeneration
import json

processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to("cuda")

metadata = []
image_files = [f for f in os.listdir(DATASET_PATH) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]

print(f"Captioning {len(image_files)} images... this may take a few minutes.")

for filename in image_files:
    img_path = os.path.join(DATASET_PATH, filename)
    raw_image = Image.open(img_path).convert('RGB')

    inputs = processor(raw_image, return_tensors="pt").to("cuda")
    out = model.generate(**inputs)
    caption = processor.decode(out[0], skip_special_tokens=True)

    full_caption = f"{caption}, professional logo style"

    metadata.append({"file_name": filename, "text": full_caption})

with open(os.path.join(DATASET_PATH, 'metadata.jsonl'), 'w') as f:
    for entry in metadata:
        f.write(json.dumps(entry) + "\n")

print("Metadata generated successfully!")

In [None]:
!accelerate config default

# Start Training
!python /content/diffusers/examples/text_to_image/train_text_to_image_lora.py \
  --pretrained_model_name_or_path="runwayml/stable-diffusion-v1-5" \
  --train_data_dir="$DATASET_PATH" \
  --dataloader_num_workers=2 \
  --resolution=512 \
  --center_crop \
  --random_flip \
  --train_batch_size=1 \
  --gradient_accumulation_steps=4 \
  --max_train_steps=1500 \
  --learning_rate=1e-04 \
  --max_grad_norm=1 \
  --lr_scheduler="cosine" \
  --lr_warmup_steps=0 \
  --output_dir="$OUTPUT_DIR" \
  --checkpointing_steps=500 \
  --validation_prompt="a minimalist logo for a tech company, clean lines" \
  --seed=42 \
  --mixed_precision="fp16"

In [None]:
from diffusers import StableDiffusionPipeline
import torch

# Untrained base model
pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16)
pipe.to("cuda")

# trained LoRA weights
pipe.load_lora_weights(OUTPUT_DIR)

prompt = "a modern logo for a Elite Spots Brand. "
image = pipe(prompt, num_inference_steps=30, guidance_scale=7.5).images[0]

image.save("generated_logo.png")
image

In [None]:
import torch
from diffusers import StableDiffusionPipeline
from PIL import Image

model_id = "runwayml/stable-diffusion-v1-5"
prompt = "a modern logo for a Elite Spots Brand"
seed = 42 # Using the exact same seed is the "secret" to a fair test
generator = torch.Generator("cuda").manual_seed(seed)

# UNTRAINED (Base) Model
pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16).to("cuda")
print("Generating image with UNTRAINED model...")
image_untrained = pipe(prompt, generator=generator, num_inference_steps=30).images[0]

#(Trained)
print("Loading your LoRA weights...")
pipe.load_lora_weights(OUTPUT_DIR)
# Reset the generator so the noise starts exactly the same
generator = torch.Generator("cuda").manual_seed(seed)

print("Generating image with TRAINED model...")
image_trained = pipe(prompt, generator=generator, num_inference_steps=30).images[0]


comparison = Image.new('RGB', (1024, 512))
comparison.paste(image_untrained, (0, 0))
comparison.paste(image_trained, (512, 0))

print("Left: Untrained (Base SD 1.5) | Right: Your Trained Logo Model")
comparison.save("comparison_result.png")
display(comparison)

In [None]:
pipe.set_adapters(["default"], adapter_weights=[0.6])

prompt = "professional logo style, minimalist mountain, vector art"
image = pipe(prompt, num_inference_steps=30, guidance_scale=7.0).images[0]
display(image)