In [None]:
from google.colab import drive
drive.mount('/content/drive')  # Mount Google Drive

# Change directory to your model folder
import os
os.chdir("/content/drive/My Drive/character")


In [None]:
import torch
print("CUDA Available:", torch.cuda.is_available())
print("CUDA Device Name:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "No GPU found")


In [None]:
import json
import re

def extract_characters(text):
    name_pattern = r"(\w+)\s+(कहता है|कहती है|बोला|बोली|बोलता है|बोलती है)"
    characters = set()

    for match in re.finditer(name_pattern, text):
        characters.add(match.group(1))

    # Additional manual patterns for this specific story
    for name in ["राम", "कालू", "सोनी"]:
        if name in text:
            characters.add(name)

    return list(characters)

def split_scenes(text):
    sentences = [s.strip() + "।" for s in text.split("।") if s.strip()]
    scenes = []
    all_characters = extract_characters(text)
    current_location = "अज्ञात"

    for i, sentence in enumerate(sentences, 1):
        if "जंगल" in sentence:
            current_location = "जंगल"
        elif "झील" in sentence:
            current_location = "झील"

        chars_in_scene = [c for c in all_characters if c in sentence]
        if "तीनों" in sentence or "सभी" in sentence:
            chars_in_scene = all_characters

        # Detect dialogue text inside single quotes
        dialogues = []
        matches = re.findall(r"'(.*?)'", sentence)
        for d in matches:
            dialogues.append(d.strip("।"))

        scene_data = {
            "scene_number": i,
            "location": current_location,
            "scene_text": sentence,
            "characters_in_scene": chars_in_scene,
            "emotion": detect_scene_emotion(sentence),
            "position": detect_scene_position(sentence),
            "dialogues": dialogues
        }

        scenes.append(scene_data)

    return scenes

def detect_emotion(text):
    text = text.lower()
    if any(word in text for word in ["खुश", "हँस", "मुस्कुर"]):
        return "happy"
    elif any(word in text for word in ["डर", "भय"]):
        return "scared"
    elif any(word in text for word in ["दुख", "उदास"]):
        return "sad"
    return "smile"

def detect_position(sentence, character):
    sentence = sentence.lower()
    if "बैठ" in sentence:
        return "sitting"
    elif "लेट" in sentence:
        return "lying"
    elif "चल" in sentence:
        return "walking"
    return "standing"

def detect_scene_emotion(sentence):
    return detect_emotion(sentence)

def detect_scene_position(sentence):
    if "बैठ" in sentence:
        return "sitting"
    return "standing"

# Hindi story
hindi_story = """
राम, एक सुंदर लड़का, जंगल में अपने बात करने वाले कुत्ते कालू के साथ चल रहा है।
वहाँ उसकी मुलाकात सोनी नाम की एक लड़की से होती है। कालू कहता है 'नमस्ते'। सोनी कहती है 'हाय'।
बाद में तीनों एक झील के पास बैठते हैं और बातें करते हैं।
"""

characters = extract_characters(hindi_story)
scenes = split_scenes(hindi_story)

story_data = {
    "original_hindi": hindi_story.strip(),
    "characters": characters,
    "scenes": scenes
}

with open("hindi_story.json", "w", encoding="utf-8") as f:
    json.dump(story_data, f, ensure_ascii=False, indent=4)

print("✅ Hindi story processed successfully!")


In [None]:
!pip install diffusers transformers accelerate bitsandbytes xformers safetensors
!pip install peft


In [None]:
import os
import torch
import random
import numpy as np
from PIL import Image
from tqdm import tqdm
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torch.optim.lr_scheduler import CosineAnnealingLR
from torch.nn import functional as F
from diffusers import StableDiffusionPipeline, DDPMScheduler
from peft import LoraConfig, get_peft_model
from google.colab import drive

drive.mount('/content/drive')

# ✅ Set Random Seeds for Reproducibility
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

set_seed(42)

# ✅ Configuration
class Config:
    model_id = "stabilityai/stable-diffusion-2-1-base"
    dataset_dir = "/content/drive/MyDrive/character/train"
    output_dir = "/content/drive/MyDrive/lora_trained_model"
    batch_size = 1
    gradient_accumulation_steps = 4
    learning_rate = 1e-5
    num_epochs = 10
    mixed_precision = "fp16"
    lora_r = 16
    lora_alpha = 32
    lora_dropout = 0.1
    target_modules = ["to_q", "to_k", "to_v", "to_out.0"]
    validation_steps = 100
    characters = sorted(os.listdir(dataset_dir))  # Sort characters for consistent order

config = Config()

# ✅ Load Stable Diffusion Model
pipe = StableDiffusionPipeline.from_pretrained(
    config.model_id, torch_dtype=torch.float16 if config.mixed_precision == "fp16" else torch.float32
)
pipe.scheduler = DDPMScheduler.from_config(pipe.scheduler.config)
pipe.to("cuda")

# ✅ Apply LoRA
lora_config = LoraConfig(
    r=config.lora_r,
    lora_alpha=config.lora_alpha,
    target_modules=config.target_modules,
    lora_dropout=config.lora_dropout
)
pipe.unet = get_peft_model(pipe.unet, lora_config)
pipe.unet.train()

# ✅ Define Data Augmentation & Custom Image Dataset
class ImageDataset(Dataset):
    def __init__(self, root_dir, character, transform=None):
        self.root_dir = os.path.join(root_dir, character)
        self.character = character
        self.transform = transform
        self.image_files = []
        self.labels = []

        for emotion in sorted(os.listdir(self.root_dir)):
            emotion_path = os.path.join(self.root_dir, emotion)
            if not os.path.isdir(emotion_path):  # ✅ Skip files
                continue

            emotion_path = os.path.join(self.root_dir, emotion)
            if not os.path.isdir(emotion_path):
                continue

            for file in os.listdir(emotion_path):
                if file.endswith((".png", ".jpg", ".jpeg")):
                    self.image_files.append(os.path.join(emotion_path, file))
                    self.labels.append(f"{character}_{emotion}")

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        img_path = self.image_files[idx]
        image = Image.open(img_path).convert("RGB")
        label = self.labels[idx]
        prompt = f"A {label.split('_')[1]} {label.split('_')[0]} in comic style"
        if self.transform:
            image = self.transform(image)
        return {"pixel_values": image, "prompt": prompt}

transform = transforms.Compose([
    transforms.Resize((512, 512)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1),
    transforms.RandomAffine(degrees=10, translate=(0.05, 0.05)),
    transforms.ToTensor(),
    transforms.Normalize([0.5], [0.5])
])

# ✅ Training Loop for Each Character
for character in config.characters:
    print(f"\n🚀 Training for character: {character}")
    char_dataset = ImageDataset(config.dataset_dir, character, transform=transform)
    char_dataloader = DataLoader(char_dataset, batch_size=config.batch_size, shuffle=True, num_workers=2, pin_memory=True)

    optimizer = torch.optim.AdamW(pipe.unet.parameters(), lr=config.learning_rate, weight_decay=0.01)
    scheduler = CosineAnnealingLR(optimizer, T_max=len(char_dataloader) * config.num_epochs // config.gradient_accumulation_steps)

    for epoch in range(config.num_epochs):
        progress_bar = tqdm(total=len(char_dataloader), desc=f"{character} - Epoch {epoch+1}")
        optimizer.zero_grad()

        for step, batch in enumerate(char_dataloader):
            pixel_values = batch["pixel_values"].to("cuda").half()
            latents = pipe.vae.encode(pixel_values).latent_dist.sample() * pipe.vae.config.scaling_factor
            timesteps = torch.randint(0, pipe.scheduler.config.num_train_timesteps, (latents.shape[0],), device="cuda").long()
            noise = torch.randn_like(latents)
            noisy_latents = pipe.scheduler.add_noise(latents, noise, timesteps)
            text_input = pipe.tokenizer(batch["prompt"], padding="max_length", max_length=pipe.tokenizer.model_max_length, return_tensors="pt").to("cuda")
            encoder_hidden_states = pipe.text_encoder(**text_input).last_hidden_state
            model_pred = pipe.unet(noisy_latents, timesteps, encoder_hidden_states=encoder_hidden_states).sample
            loss = F.mse_loss(model_pred, noise) / config.gradient_accumulation_steps
            loss.backward()

            if (step + 1) % config.gradient_accumulation_steps == 0:
                torch.nn.utils.clip_grad_norm_(pipe.unet.parameters(), max_norm=1.0)
                optimizer.step()
                scheduler.step()
                optimizer.zero_grad()
        progress_bar.close()

    # ✅ Save Checkpoint for Each Character
    char_save_dir = os.path.join(config.output_dir, f"{character}_lora")
    os.makedirs(char_save_dir, exist_ok=True)
    lora_save_dir = os.path.join(char_save_dir, "lora_adapter")
    pipe.unet.save_pretrained(lora_save_dir)
    print(f"✅ LoRA model saved for {character} at {char_save_dir}")

print("🎉 Training Completed for All Characters!")


In [None]:
!pip install googletrans==4.0.0-rc1


In [None]:
import json
from googletrans import Translator

def translate_story(input_file, output_file):
    # Initialize translator
    translator = Translator()

    # Load Hindi story
    with open(input_file, 'r', encoding='utf-8') as f:
        hindi_data = json.load(f)

    # Translate the main story text
    translated_story = translator.translate(hindi_data['original_hindi'], src='hi', dest='en').text

    # Prepare English data structure
    english_data = {
        "original_english": translated_story,
        "characters": [],
        "scenes": []
    }

    # Translate character names (we'll keep these as is for LoRA mapping)
    english_data['characters'] = hindi_data['characters']

    # Translate each scene
    for scene in hindi_data['scenes']:
        translated_scene = {
            "scene_number": scene["scene_number"],
            "location": translator.translate(scene["location"], src='hi', dest='en').text,
            "scene_text": translator.translate(scene["scene_text"], src='hi', dest='en').text,
            "characters_in_scene": scene["characters_in_scene"],  # Keep original for mapping
            "emotion": scene["emotion"],  # Already in English
            "position": scene["position"],  # Already in English
            "dialogues": [translator.translate(d, src='hi', dest='en').text for d in scene["dialogues"]]
        }
        english_data['scenes'].append(translated_scene)

    # Save translated story
    with open(output_file, 'w', encoding='utf-8') as f:
        json.dump(english_data, f, ensure_ascii=False, indent=4)

    print(f"✅ Successfully translated {input_file} to {output_file}")

# Usage
translate_story("hindi_story.json", "translated_story.json")

In [None]:
import json

# Your Hindi to English character mapping
character_map = {
    "कालू": "Bruno",
    "सोनी": "Tina",
    "राम": "Ram"
}

# Optional: LoRA adapter mapping (if needed later)
lora_model_map = {
    "Kalu": "Bruno",
    "Soni": "Tina",
    "Ram": "Ram"
}

# Load the JSON file
with open("translated_story.json", "r", encoding="utf-8") as f:
    data = json.load(f)

# Update character names in each scene
for scene in data["scenes"]:
    scene["characters_in_scene"] = [character_map.get(c, c) for c in scene["characters_in_scene"]]

# Replace characters list with English-mapped ones (if needed)
data["characters"] = [character_map.get(c, c) for c in data["characters"]]

# Save the updated file
with open("english.json", "w", encoding="utf-8") as f:
    json.dump(data, f, indent=4, ensure_ascii=False)

print("✅ Character mapping done and saved to 'english.json'")


In [None]:
import torch
import os
import json
from diffusers import StableDiffusionPipeline
from peft import PeftModel

# ✅ Configuration
class Config:
    base_model = "stabilityai/stable-diffusion-2-1-base"
    lora_weights_path = "/content/drive/MyDrive/lora_trained_model"
    output_dir = "./panels/"
    json_file = "english.json"

# ✅ Create output folder
os.makedirs(Config.output_dir, exist_ok=True)

# ✅ Style prompts based on known visual references
character_prompts = {
    "Ram": "a charming young man with a blonde hair and blue eyes, inspired by Flynn Rider from Tangled, Disney-style",
    "Bruno": "a goofy, friendly Great Dane dog, specifically a cartoon dog like Scooby-Doo",
    "Tina": "a brave island girl with long curly black hair and tropical attire, inspired by Moana, Disney-style"
}

# ✅ Cache for reusing loaded models
pipe_cache = {}

def load_model_for_inference(character):
    """Load and cache model for a specific character using LoRA."""
    if character in pipe_cache:
        return pipe_cache[character]

    print(f"🧠 Loading model for {character}")
    pipe = StableDiffusionPipeline.from_pretrained(
        Config.base_model, torch_dtype=torch.float16
    ).to("cuda")

    lora_path = os.path.join(Config.lora_weights_path, f"{character}_lora", "lora_adapter")
    if os.path.exists(lora_path):
        print(f"🔗 Applying LoRA for {character} from {lora_path}")
        pipe.unet = PeftModel.from_pretrained(pipe.unet, lora_path).merge_and_unload()
    else:
        print(f"⚠️ No LoRA found for {character}, using base model.")

    pipe_cache[character] = pipe
    return pipe

def generate_comic_from_json(json_file):
    with open(json_file, "r", encoding="utf-8") as f:
        data = json.load(f)

    last_location = None

    for scene in data["scenes"]:
        scene_number = scene["scene_number"]
        scene_text = scene["scene_text"]
        location = scene.get("location") or last_location or "Unknown Place"
        last_location = location

        # Determine main character for the scene
        character = (
            scene["characters_in_scene"][0]
            if scene["characters_in_scene"]
            else data["characters"][0]
        )

        # Add location context if needed
        if location.lower() not in scene_text.lower():
            scene_text = f"In the {location}, {scene_text}"

        # Add style description
        style_desc = character_prompts.get(character, "")
        prompt = (
            f"{scene_text}. {style_desc}, highly detailed comic-style illustration, vibrant colors, "
            "inked outlines, dynamic composition, expressive characters, graphic novel style."
        )

        # Load LoRA model
        try:
            torch.cuda.empty_cache()
            pipe = load_model_for_inference(character)

            print(f"🎨 Scene {scene_number}: Prompt -> {prompt}")
            with torch.autocast("cuda"):
                image = pipe(prompt).images[0]

            output_path = os.path.join(Config.output_dir, f"comic_panel_{scene_number}.bmp")
            image.save(output_path)
            print(f"✅ Saved panel at {output_path}")

        except Exception as e:
            print(f"❌ Error generating scene {scene_number}: {e}")

    print("✅✅ Comic generation complete!")

# ✅ Run the script
if __name__ == "__main__":
    generate_comic_from_json(Config.json_file)


In [None]:
# Install Noto Sans Devanagari
!apt-get install -y fonts-noto


In [None]:
import json
import os
from PIL import Image, ImageDraw, ImageFont

# ✅ Configuration
class Config:
    json_path = "hindi_story.json"
    panels_dir = "/content/panels"
    font_path = "/usr/share/fonts/truetype/noto/NotoSansDevanagari-Regular.ttf"
    font_size = 36
    output_dir_with_dialogue = "/content/panels_with_dialogue"
    output_dir_without_dialogue = "/content/panels_without_dialogue"

# ✅ Create output directories
os.makedirs(Config.output_dir_with_dialogue, exist_ok=True)
os.makedirs(Config.output_dir_without_dialogue, exist_ok=True)

# ✅ Load font
try:
    font = ImageFont.truetype(Config.font_path, Config.font_size)
except OSError:
    raise RuntimeError(f"Could not load font from: {Config.font_path}")

# ✅ Load story JSON
with open(Config.json_path, "r", encoding="utf-8") as f:
    data = json.load(f)

# ✅ Process each scene
for scene in data["scenes"]:
    scene_number = scene["scene_number"]
    dialogues = scene.get("dialogues", [])

    image_path = os.path.join(Config.panels_dir, f"comic_panel_{scene_number}.bmp")
    if not os.path.exists(image_path):
        print(f"⚠️ Scene {scene_number}: No matching image found.")
        continue

    # ✅ Save the original image as "without dialogue"
    image = Image.open(image_path)
    no_dialogue_path = os.path.join(Config.output_dir_without_dialogue, f"comic_panel_{scene_number}.bmp")
    image.save(no_dialogue_path)
    print(f"📁 Scene {scene_number}: Saved without dialogue to -> {no_dialogue_path}")

    # ✅ If no dialogues, skip creating a second version
    if not dialogues:
        continue

    # ✅ Create copy for dialogue version
    image_with_dialogue = image.copy()
    draw = ImageDraw.Draw(image_with_dialogue)

    x, y = 50, 50
    line_height = Config.font_size + 30
    padding = 15

    for dialogue in dialogues:
        bbox = draw.textbbox((x, y), dialogue, font=font)
        text_width = bbox[2] - bbox[0]
        text_height = bbox[3] - bbox[1]

        # White bubble with black outline
        box_coords = [x - padding, y - padding, x + text_width + padding, y + text_height + padding]
        draw.rectangle(box_coords, fill="white", outline="black", width=2)

        draw.text((x, y), dialogue, font=font, fill="black")
        y += line_height

    # ✅ Save dialogue image
    dialogue_path = os.path.join(Config.output_dir_with_dialogue, f"comic_panel_{scene_number}.bmp")
    image_with_dialogue.save(dialogue_path)
    print(f"💬 Scene {scene_number}: Dialogue version saved to -> {dialogue_path}")

print("✅✅ Done! All panels saved with and without dialogues.")


In [None]:
from PIL import Image
import os
import json

# ✅ Configuration
class Config:
    json_path = "hindi_story.json"
    dialogue_dir = "/content/panels_with_dialogue"
    no_dialogue_dir = "/content/panels_without_dialogue"
    final_strip_path = "/content/final_comic_strip.bmp"
    layout = "horizontal"  # or "vertical"

# ✅ Load JSON
with open(Config.json_path, "r", encoding="utf-8") as f:
    data = json.load(f)

# ✅ Gather all scene images in order
scene_images = []
for scene in data["scenes"]:
    scene_number = scene["scene_number"]
    has_dialogue = bool(scene.get("dialogues"))

    # Pick correct image based on presence of dialogues
    if has_dialogue:
        img_path = os.path.join(Config.dialogue_dir, f"comic_panel_{scene_number}.bmp")
    else:
        img_path = os.path.join(Config.no_dialogue_dir, f"comic_panel_{scene_number}.bmp")

    if not os.path.exists(img_path):
        print(f"⚠️ Image for scene {scene_number} not found at {img_path}, skipping.")
        continue

    image = Image.open(img_path)
    scene_images.append(image)

# ✅ Combine images
if not scene_images:
    raise RuntimeError("No images found to create comic strip.")

# Dimensions for final canvas
if Config.layout == "horizontal":
    total_width = sum(img.width for img in scene_images)
    max_height = max(img.height for img in scene_images)
    final_image = Image.new("RGB", (total_width, max_height), color=(255, 255, 255))

    x_offset = 0
    for img in scene_images:
        final_image.paste(img, (x_offset, 0))
        x_offset += img.width

else:  # vertical layout
    max_width = max(img.width for img in scene_images)
    total_height = sum(img.height for img in scene_images)
    final_image = Image.new("RGB", (max_width, total_height), color=(255, 255, 255))

    y_offset = 0
    for img in scene_images:
        final_image.paste(img, (0, y_offset))
        y_offset += img.height

# ✅ Save the final comic strip
final_image.save(Config.final_strip_path)
print(f"🎉 Comic strip created: {Config.final_strip_path}")
