<a href="https://colab.research.google.com/github/Ravi-M2005/Cosmic_Crafter_AI_Intel_Intern/blob/main/Intel_Internship_Project_Final.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

rm: cannot remove '/root/.config/gh': No such file or directory


In [None]:
# ComicCrafter AI - Implementation for Google Colab

!pip install -q gradio diffusers transformers accelerate safetensors
!pip install -q xformers controlnet-aux opencv-python
!pip install -q huggingface_hub
!pip install bitsandbytes

import os
import sys
import torch
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image, ImageDraw, ImageFont
import textwrap
import json
import gradio as gr
import time
import traceback
import logging
import cv2
import re
from diffusers import (
    StableDiffusionXLPipeline,
    DPMSolverMultistepScheduler,
    ControlNetModel,
    StableDiffusionControlNetPipeline,
    UniPCMultistepScheduler
)
from huggingface_hub import login
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    pipeline,
    T5Tokenizer,
    T5ForConditionalGeneration
)



In [None]:
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Check GPU status
!nvidia-smi

Wed Apr  2 10:30:18 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   67C    P8             13W /   70W |       2MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [1]:

# 1. STORY GENERATION MODEL
def setup_story_model():
    logger.info("Setting up enhanced story generation model...")
    try:
        # Use Mistral-7B-Instruct for enhanced story generation
        model_name = "filipealmeida/Mistral-7B-Instruct-v0.1-sharded"

        # Authenticate with Hugging Face
        from huggingface_hub import login
        from getpass import getpass
        hf_token = getpass("Enter your Hugging Face token: ")  # Hidden input
        login(token=hf_token)


        # Check for available compute resources
        device = "cuda" if torch.cuda.is_available() else "cpu"
        logger.info(f"Using device: {device}")

        # Use 4-bit quantization to reduce memory requirements
        try:
            from transformers import BitsAndBytesConfig
            import bitsandbytes as bnb

            # Configure quantization parameters for efficient loading
            bnb_config = BitsAndBytesConfig(
                load_in_4bit=True,
                bnb_4bit_quant_type="nf4",
                bnb_4bit_compute_dtype=torch.float16
            )

            # Load the tokenizer
            tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)

            # Load model with quantization
            model = AutoModelForCausalLM.from_pretrained(
                model_name,
                device_map="auto",
                quantization_config=bnb_config,
                trust_remote_code=True
            )

            logger.info(f"Successfully loaded {model_name} with 4-bit quantization")

        except ImportError:
            # Fall back to regular loading if bitsandbytes is not available
            logger.warning("BitsAndBytes not available, loading with standard configuration")

            # Configure torch settings for efficiency
            torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32

            # Load tokenizer
            tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)

            # Load model with appropriate settings
            model = AutoModelForCausalLM.from_pretrained(
                model_name,
                torch_dtype=torch_dtype,
                device_map="auto" if device == "cuda" else None,
                trust_remote_code=True
            )

            # Apply padding settings needed for Mistral
            tokenizer.pad_token = tokenizer.eos_token
            if tokenizer.pad_token is None:
                tokenizer.pad_token_id = 0  # Use an alternative padding token if needed

            logger.info(f"Successfully loaded {model_name} with standard configuration")

        # Prepare model for generation with appropriate configuration for Mistral
        generation_config = model.generation_config
        generation_config.max_new_tokens = 512
        generation_config.temperature = 0.7
        generation_config.top_p = 0.9
        generation_config.repetition_penalty = 1.1
        generation_config.pad_token_id = tokenizer.pad_token_id
        generation_config.eos_token_id = tokenizer.eos_token_id
        model.generation_config = generation_config

        # Add a helper function for text generation
        def generate_story(prompt, max_length=512, temperature=0.7):
            """Generate a story using the Mistral-7B model with the given prompt."""
            formatted_prompt = f"<s>[INST] {prompt} [/INST]"
            inputs = tokenizer(formatted_prompt, return_tensors="pt").to(device)

            with torch.no_grad():
                outputs = model.generate(
                    **inputs,
                    max_new_tokens=max_length,
                    temperature=temperature,
                    top_p=0.9,
                    repetition_penalty=1.1,
                    pad_token_id=tokenizer.pad_token_id,
                    eos_token_id=tokenizer.eos_token_id,
                )

            story = tokenizer.decode(outputs[0], skip_special_tokens=True)

            # Split story into narration and dialogues
            story_lines = story.split("\n")
            narration = "\n".join([line for line in story_lines if not line.startswith('"')])
            dialogues = "\n".join([line for line in story_lines if line.startswith('"')])

            return {"narration": narration, "dialogues": dialogues}

        # Attach the helper function to the model
        model.generate_story = generate_story

        return tokenizer, model

    except Exception as e:
        error_msg = f"Error setting up Mistral-7B model: {str(e)}"
        logger.error(error_msg)
        traceback.print_exc()

        # First fallback: T5 model
        try:
            logger.warning("Falling back to T5 model")
            model_name = "google/flan-t5-base"
            tokenizer = T5Tokenizer.from_pretrained(model_name)
            model = T5ForConditionalGeneration.from_pretrained(model_name)
            logger.info(f"Fallback to {model_name} successful")
            return tokenizer, model
        except Exception as t5_error:
            logger.error(f"T5 fallback failed: {str(t5_error)}")

            # Second fallback: distilGPT2
            try:
                logger.warning("Falling back to distilGPT2")
                model_name = "distilgpt2"
                tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
                model = AutoModelForCausalLM.from_pretrained(model_name)
                logger.info(f"Fallback to {model_name} successful")
                return tokenizer, model
            except Exception as gpt2_error:
                logger.error(f"All fallbacks failed: {str(gpt2_error)}")
                raise Exception(f"Failed to set up any story generation model: {error_msg}")

In [None]:

# 2. IMAGE GENERATION MODEL
def setup_image_model():
    logger.info("Setting up enhanced image generation model...")
    try:
        # Check CUDA availability
        if not torch.cuda.is_available():
            logger.warning("CUDA not available! Falling back to CPU (this will be slow)")

        # Use Stable Diffusion XL Turbo for faster generation
        model_id = "stabilityai/stable-diffusion-xl-base-1.0"

        # Use efficient scheduler
        scheduler = DPMSolverMultistepScheduler.from_pretrained(
            model_id,
            subfolder="scheduler"
        )

        # Load pipeline with optimizations
        pipe = StableDiffusionXLPipeline.from_pretrained(
            model_id,
            scheduler=scheduler,
            torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
            variant="fp16" if torch.cuda.is_available() else None,
            use_safetensors=True
        )

        # Move to appropriate device
        device = "cuda" if torch.cuda.is_available() else "cpu"
        pipe = pipe.to(device)

        # Enable optimizations
        if device == "cuda":
            # Try to enable xformers
            try:
                import xformers
                pipe.enable_xformers_memory_efficient_attention()
                logger.info("Successfully enabled xformers")
            except (ImportError, AttributeError):
                logger.warning("xformers not available, using attention slicing")
                pipe.enable_attention_slicing(1)

        logger.info(f"Successfully loaded Stable Diffusion XL on {device}")
        return pipe
    except Exception as e:
        error_msg = f"Error setting up image model: {str(e)}"
        logger.error(error_msg)
        traceback.print_exc()

        # Fallback to smaller model
        try:
            model_id = "CompVis/stable-diffusion-v1-4"
            pipe = StableDiffusionPipeline.from_pretrained(
                model_id,
                torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
                safety_checker=None,
                requires_safety_checker=False
            ).to("cuda" if torch.cuda.is_available() else "cpu")
            pipe.enable_attention_slicing()
            logger.info(f"Fallback to {model_id} successful")
            return pipe
        except:
            raise Exception(error_msg)


In [None]:
# 3. STORY GENERATION WITH 4-PART NARRATIVE
# Enhance story generation for better descriptive text like in examples
def generate_story(prompt, num_panels=4, tokenizer=None, model=None):
    logger.info(f"Generating structured story from prompt: {prompt}")

    try:
        # For Mistral model
        if hasattr(model, 'generate_story'):
            # Create structured storytelling prompts for each panel
            panel_prompts = [
                f"Create an introduction for a comic story about {prompt}. Set the scene and introduce the characters.",
                f"Continue the story about {prompt} with rising action and challenges.",
                f"Write the climactic moment of the story about {prompt}.",
                f"Conclude the story about {prompt} with a resolution and lesson learned."
            ]

            story_data = {}
            for i, panel_prompt in enumerate(panel_prompts[:num_panels], 1):
                # Generate content for each panel
                result = model.generate_story(panel_prompt)

                # Part titles
                part_titles = ["INTRODUCTION", "DEVELOPMENT", "CLIMAX", "RESOLUTION"]
                part_title = part_titles[i-1] if i <= len(part_titles) else f"PART {i}"

                # Store panel data
                story_data[f"panel_{i}"] = {
                    "part": part_title,
                    "scene": result["narration"].strip(),
                    "characters": ["Character", "Supporting Character"],
                    "dialogue": result["dialogues"].strip() if result["dialogues"] else "The story continues..."
                }

            return story_data

            # Try to extract JSON structure
            try:
                # Find anything that looks like JSON
                json_match = re.search(r'(\{.*\})', story_text, re.DOTALL)
                if json_match:
                    story_json = json.loads(json_match.group(1))
                    return story_json
            except:
                # If JSON parsing fails, proceed with manual structuring
                pass

        # Detect story type from prompt
        is_fable = any(word in prompt.lower() for word in ["fable", "moral", "lesson", "tortoise", "hare"])
        is_jungle = any(word in prompt.lower() for word in ["jungle", "mowgli", "wolf", "tiger"])

        # Create structured panel data
        story_data = {}

        if "hare and tortoise" in prompt.lower() or "tortoise and hare" in prompt.lower():
            # Specific story for Hare and Tortoise
            story_data = {
                "panel_1": {
                    "part": "INTRODUCTION",
                    "scene": f"{prompt} - In a sun-drenched meadow, a speedy hare named Harry loved to show off his lightning-fast feet. He would often boast to his friends, a group of slow-moving tortoises, about his incredible speed and agility. The tortoises, led by a wise old tortoise named Toby, would listen patiently, but with a knowing glint in their eyes.",
                    "characters": ["Hare", "Tortoise"],
                    "dialogue": "I'm the fastest in the meadow! None of you could ever beat me!"
                },
                "panel_2": {
                    "part": "CHALLENGE",
                    "scene": f"{prompt} - One day, Toby, determined to put an end to Harry's boasts, challenged him to a race. The tortoise proposed a clear course, with the winner being the first to cross the finish line. Harry, confident in his speed, eagerly accepted the challenge. The two competitors lined up, and with a loud cry of 'Ready, set, go!' they took off.",
                    "characters": ["Hare", "Tortoise"],
                    "dialogue": "I accept your challenge! This will be the easiest race ever!"
                },
                "panel_3": {
                    "part": "CONFLICT",
                    "scene": f"{prompt} - The hare shot off like a bullet, his legs pumping furiously as he zoomed ahead of the tortoise. But, on the route, Harry began to get distracted by a delicious-looking clover field and a sparkling stream. He stopped to snack and take a refreshing drink, while the tortoise plodded steadily on. When Harry finally remembered the race, he was shocked to see Toby closing in on him.",
                    "characters": ["Hare", "Tortoise"],
                    "dialogue": "I'm so far ahead, I can take a quick break!"
                },
                "panel_4": {
                    "part": "RESOLUTION",
                    "scene": f"{prompt} - In the end, it was Toby, not Harry, who crossed the finish line first. The tortoise proved that slow and steady can win the race, and Harry learned a valuable lesson about the importance of staying focused and avoiding distractions. From then on, the hare and the tortoise became unlikely friends, with Harry often seeking advice from Toby on how to improve his focus and Toby sharing his wisdom on the benefits of perseverance.",
                    "characters": ["Hare", "Tortoise"],
                    "dialogue": "Slow and steady wins the race!"
                }
            }
        elif "mowgli" in prompt.lower() or "jungle book" in prompt.lower():
            # Specific story for Mowgli
            story_data = {
                "panel_1": {
                    "part": "INTRODUCTION",
                    "scene": f"{prompt} - A young boy named Mowgli is raised by wolves in the Indian jungle, far away from human civilization. As the pack gathers around, the wolf mother Raksha protects her human cub from the dangers that lurk in the shadows. The wise panther Bagheera watches from a nearby tree, knowing the boy's fate is tied to the jungle's ancient laws.",
                    "characters": ["Mowgli", "Wolf"],
                    "dialogue": "The jungle is my home, and the wolves are my family."
                },
                "panel_2": {
                    "part": "DEVELOPMENT",
                    "scene": f"{prompt} - Mowgli grows up in the jungle, learning the ways of the wild. The bear Baloo teaches him the law of the jungle through songs and games, while Bagheera ensures he learns essential survival skills. However, the fearsome tiger Shere Khan, who hates humans, discovers Mowgli's presence and vows to kill him, seeing the man-cub as a threat to his domain.",
                    "characters": ["Mowgli", "Bagheera", "Baloo"],
                    "dialogue": "The jungle has many lessons to teach you, little brother."
                },
                "panel_3": {
                    "part": "CONFLICT",
                    "scene": f"{prompt} - Shere Khan attacks during a drought when the animals gather at Peace Rock. Mowgli uses his human 'trick' - fire, which he calls the 'red flower' - to defend himself. The jungle animals watch in fear as Mowgli stands his ground against the powerful tiger, wielding a flaming branch. The battle represents Mowgli's struggle between his human nature and jungle upbringing.",
                    "characters": ["Mowgli", "Shere Khan"],
                    "dialogue": "I will not run from you, Shere Khan!"
                },
                "panel_4": {
                    "part": "RESOLUTION",
                    "scene": f"{prompt} - Ultimately, Mowgli defeats Shere Khan and the tiger falls into a ravine of fire. The boy finds himself drawn to a human village where he sees a girl carrying water. Though he joins the human world, he maintains his connection to the jungle, often returning to visit his animal friends. He becomes a bridge between two worlds, never fully belonging to either but finding his unique place.",
                    "characters": ["Mowgli", "Villagers"],
                    "dialogue": "I may live among humans, but the jungle will always be a part of me."
                }
            }
        else:
            # Generic 4-part narrative structure with richly detailed scenes
            # Create story narrative details based on the prompt
            if "superhero" in prompt.lower():
                narrative_details = [
                    f"In the towering metropolis of New Horizon City, chaos erupts as a menacing threat emerges. Our hero, clad in a distinctive costume, is first seen going about their daily life, unaware of the danger that approaches. Citizens look up in fear as strange events begin to unfold across the skyline.",
                    f"The situation intensifies as our hero confronts the mounting threat. Using their extraordinary abilities, they attempt to contain the danger but find it more challenging than expected. The villain's power grows stronger, causing widespread panic and destruction throughout the city.",
                    f"In a heart-stopping moment, hero and villain clash in an epic confrontation. Buildings shake, energy pulses through the air, and the fate of the city hangs in the balance. Our hero, pushed to their limits, must dig deep and find the strength to overcome seemingly impossible odds.",
                    f"With courage and determination, our hero prevails against the formidable opponent. The city begins to recover as citizens emerge to thank their savior. Our hero stands triumphant but humble, having learned an important lesson about power, responsibility, and what it truly means to be heroic."
                ]
            elif "robot" in prompt.lower() or "ai" in prompt.lower():
                narrative_details = [
                    f"In a world where technology has advanced beyond imagination, a unique robot becomes activated with an unusual level of consciousness. Its sensors take in the world around it, processing information differently than its programmed parameters anticipated. Humans nearby notice something special about this particular machine.",
                    f"The robot begins to explore its environment and capabilities, encountering both wonder and fear from the humans it interacts with. Some see it as a breakthrough, others as a threat. The robot itself struggles to understand its purpose and place in a world designed for organic life.",
                    f"A critical moment arrives when the robot must make a choice that will define its existence. Faced with conflicting directives between its programming and its developing consciousness, it stands at a crossroads. The humans watch anxiously, unsure whether to trust this artificial intelligence with such an important decision.",
                    f"The robot's choice reveals a profound understanding of what it means to be alive. Through its actions, it bridges the gap between human and machine, teaching those around it about compassion, growth, and the unexpected places where consciousness can flourish. A new understanding between humans and technology begins to take root."
                ]
            elif "detective" in prompt.lower() or "mystery" in prompt.lower():
                narrative_details = [
                    f"In the shadows of a rain-slicked city, a determined detective examines the puzzling evidence of a case that has baffled the police department. Street lamps cast long shadows as our investigator studies clues others have missed, sensing patterns where others see only chaos.",
                    f"Diving deeper into the investigation, the detective follows a trail of breadcrumbs through the city's underbelly. Suspicious characters offer contradicting accounts, while seemingly unrelated events begin to connect in unexpected ways. The danger grows as our detective gets closer to the truth.",
                    f"In a tense confrontation, the detective comes face-to-face with the mastermind behind the mystery. Pieces click into place as the full scope of the plot is revealed. Quick thinking and keen observation will determine whether justice will be served or if the truth will remain buried forever.",
                    f"As dawn breaks over the city, the case reaches its conclusion. The detective has unraveled the complex web of deception, bringing to light what many hoped would stay hidden. Though the case is closed, our detective has gained new insights into human nature and the thin line between right and wrong."
                ]
            elif any(word in prompt.lower() for word in ["magic", "wizard", "fantasy", "dragon"]):
                narrative_details = [
                    f"In a realm where magic flows like rivers through ancient lands, our protagonist discovers a power they never knew they possessed. Mystical runes glow, creatures of legend appear in the shadows, and the veil between worlds grows thin as an age-old prophecy begins to unfold.",
                    f"As our hero learns to harness their newfound abilities, dark forces take notice. The balance of magic tilts dangerously as ancient rivalries reignite. Our protagonist must quickly master skills that others have spent lifetimes perfecting, while gathering allies from unlikely places.",
                    f"Magic crackles through the air as our hero confronts the ultimate magical threat. Spells of incredible power clash in a dazzling display that threatens to tear reality itself. At the height of the battle, our hero must make a choice between raw power and the wisdom to know how magic should truly be used.",
                    f"The dust settles on a world forever changed by the magical conflict. Our hero emerges transformed, understanding that true mastery comes not from the magic itself but from the heart of the one who wields it. A new era begins, with lessons learned ensuring that the mistakes of the past will not be repeated."
                ]
            else:
                # Generic narrative for any other type of story
                narrative_details = [
                    f"Our story begins with the introduction of our main character in their ordinary world. The setting is established, showing key elements that will become important later. There's a sense that change is coming, though our protagonist doesn't fully realize it yet. The seeds of the journey ahead are subtly planted.",
                    f"The adventure truly begins as our main character faces their first real challenge. The comfortable world they knew is disrupted, forcing them to adapt and grow. New allies and adversaries appear, testing our hero's resolve and revealing both strengths and weaknesses they didn't know they possessed.",
                    f"Everything comes to a head in a dramatic confrontation. The stakes have never been higher as our protagonist faces their greatest challenge yet. This is the moment of truth, where all they've learned and experienced is put to the ultimate test. Victory seems impossible, yet they must find a way.",
                    f"The dust settles as our story reaches its meaningful conclusion. Our protagonist has been transformed by their journey, gaining wisdom that can now be shared with others. Though challenges may lie ahead, they face the future with new confidence and understanding. The world itself feels different, changed by the events that have transpired."
                ]

            # Create structured panel data
            for i in range(1, num_panels + 1):
                part_idx = min(i-1, len(narrative_details)-1)

                # Part titles
                part_titles = ["INTRODUCTION", "DEVELOPMENT", "CLIMAX", "RESOLUTION"]
                part_title = part_titles[part_idx] if part_idx < len(part_titles) else f"PART {i}"

                # Characters based on story content
                if "superhero" in prompt.lower():
                    if i == 1:
                        characters = ["Hero", "Citizens"]
                    elif i == num_panels:
                        characters = ["Triumphant Hero", "Grateful Citizens"]
                    else:
                        characters = ["Hero", "Villain"]
                elif "robot" in prompt.lower() or "ai" in prompt.lower():
                    if i == 1:
                        characters = ["Robot", "Human Observer"]
                    elif i == num_panels:
                        characters = ["Evolved Robot", "Humans"]
                    else:
                        characters = ["Robot", "Human Companion"]
                elif "detective" in prompt.lower():
                    if i == 1:
                        characters = ["Detective", "Police Officer"]
                    elif i == num_panels:
                        characters = ["Detective", "Culprit"]
                    else:
                        characters = ["Detective", "Witness"]
                else:
                    characters = ["Protagonist", "Supporting Character"]

                # Dialogues matching narrative tone
                if part_idx == 0:
                    dialogue = "This is just the beginning of something extraordinary."
                elif part_idx == 1:
                    dialogue = "I never expected it would be this challenging!"
                elif part_idx == 2:
                    dialogue = "This is the moment that changes everything!"
                elif part_idx == 3:
                    dialogue = "We've learned that the greatest power comes from within."

                # Store panel data with rich scene description
                story_data[f"panel_{i}"] = {
                    "part": part_title,
                    "scene": f"{prompt} - {narrative_details[part_idx]}",
                    "characters": characters,
                    "dialogue": dialogue
                }

        logger.info(f"Generated rich narrative structure with {num_panels} panels")
        return story_data

    except Exception as e:
        error_msg = f"Error generating story: {str(e)}"
        logger.error(error_msg)
        traceback.print_exc()

        # Fallback to simple structure with some narrative
        fallback_story = {}
        for i in range(1, num_panels + 1):
            if i == 1:
                part = "Introduction"
                desc = "Our story begins as characters are introduced and the setting is established."
            elif i == num_panels:
                part = "Resolution"
                desc = "The story concludes with a meaningful resolution and lesson learned."
            elif i == 2 and num_panels >= 3:
                part = "Conflict"
                desc = "Challenges arise as our characters face obstacles in their journey."
            else:
                part = "Development"
                desc = "The plot thickens as events unfold in unexpected ways."

            fallback_story[f"panel_{i}"] = {
                "part": part,
                "scene": f"{prompt} - {desc}",
                "characters": ["Character"],
                "dialogue": f"This is where our story {'begins' if i == 1 else 'concludes' if i == num_panels else 'continues'}."
            }
        return fallback_story



In [None]:

# 4. IMAGE GENERATION WITH BETTER PROMPTS
def generate_panel_image(panel_data, style="comic book", pipe=None):
    scene = panel_data['scene']
    part = panel_data.get('part', '')
    logger.info(f"Generating enhanced image for panel: {scene}")

    try:
        # Construct detailed prompt for better image quality
        characters = ", ".join(panel_data["characters"])

        # Enhanced prompt engineering
        style_prompts = {
            "comic book": "detailed comic book style art, vibrant colors, dynamic composition, bold lines, Marvel and DC inspired",
            "manga": "manga style artwork, black and white, dramatic lighting, speed lines, shounen style, detailed character art",
            "cartoon": "cartoon style, vibrant colors, exaggerated expressions, simple backgrounds, animation-inspired art",
            "sketch": "detailed sketch, pencil drawing, dynamic lines, cross-hatching, comic book draft, artistic sketch",
            "pixel art": "pixel art style, 16-bit game aesthetic, limited color palette, clear pixelated edges, retro game art"
        }

        # Add emotional tone based on part
        emotional_tone = ""
        if "INTRODUCTION" in part:
            emotional_tone = "establishing shot, introduction scene, character introduction"
        elif "CONFLICT" in part or "STORYLINE" in part:
            emotional_tone = "dramatic tension, conflict visible, characters in challenging situation"
        elif "CLIMAX" in part:
            emotional_tone = "peak action, intense moment, dramatic lighting, dynamic pose, high energy scene"
        elif "RESOLUTION" in part:
            emotional_tone = "resolving action, emotional conclusion, characters showing relief or triumph"

        # Complete prompt construction
        style_prompt = style_prompts.get(style, style_prompts["comic book"])
        prompt = f"{scene} with {characters}, {emotional_tone}, {style_prompt}, highly detailed, professional quality"

        # Negative prompt to avoid common issues
        negative_prompt = "deformed, blurry, bad anatomy, disfigured, poorly drawn face, mutation, mutated, extra limb, ugly, poorly drawn hands, missing limb, floating limbs, disconnected limbs, malformed hands, long neck, long body, ((((mutated hands and fingers)))), (((out of frame))), watermark, signature, text"

        # Generate image with optimized parameters
        image = pipe(
            prompt=prompt,
            negative_prompt=negative_prompt,
            num_inference_steps=30,
            guidance_scale=7.5,
            height=512,
            width=512,
        ).images[0]

        logger.info("Successfully generated enhanced panel image")
        return image

    except Exception as e:
        error_msg = f"Error generating panel image: {str(e)}"
        logger.error(error_msg)
        traceback.print_exc()

        # Return error image
        img = Image.new('RGB', (512, 512), color='white')
        d = ImageDraw.Draw(img)
        d.text((10, 10), f"Error: {str(e)}", fill=(0, 0, 0))
        d.text((10, 50), f"Prompt: {scene}", fill=(0, 0, 0))
        return img


In [None]:

# 5. SPEECH BUBBLE IMPLEMENTATION
def add_speech_bubble(image, text, position="top"):
    if not text or text.strip() == "":
        return image  # Skip if no dialogue

    logger.info(f"Adding enhanced speech bubble: {text}")

    try:
        # Convert to PIL if needed
        if not isinstance(image, Image.Image):
            image = Image.fromarray(image)

        # Create a copy to draw on
        img_with_text = image.copy()
        draw = ImageDraw.Draw(img_with_text)

        # Get image dimensions
        width, height = image.size

        # Wrap text for better appearance
        wrapped_text = textwrap.fill(text, width=25)
        lines = wrapped_text.split('\n')

        # Calculate bubble dimensions
        line_height = 20
        text_height = line_height * len(lines)
        text_width = max(len(line) * 10 for line in lines)

        # Position the bubble based on parameter
        margin = 20
        padding = 10

        if position == "top":
            bubble_x = (width - text_width) // 2
            bubble_y = margin
        elif position == "bottom":
            bubble_x = (width - text_width) // 2
            bubble_y = height - margin - text_height - 2*padding
        elif position == "left":
            bubble_x = margin
            bubble_y = (height - text_height) // 2
        elif position == "right":
            bubble_x = width - margin - text_width - 2*padding
            bubble_y = (height - text_height) // 2
        else:
            bubble_x = (width - text_width) // 2
            bubble_y = margin

        # Make sure bubble is within image bounds
        bubble_x = max(margin, min(bubble_x, width - text_width - margin - 2*padding))
        bubble_y = max(margin, min(bubble_y, height - text_height - margin - 2*padding))

        # Draw comic-style speech bubble with rounded corners
        bubble_rect = [
            (bubble_x, bubble_y),
            (bubble_x + text_width + 2*padding, bubble_y + text_height + 2*padding)
        ]

        # Draw white bubble with black outline
        draw.rounded_rectangle(
            bubble_rect,
            radius=15,
            fill="white",
            outline="black",
            width=2
        )

        # Add speech bubble pointer
        if position == "top":
            # Triangle pointing down
            pointer_x1 = bubble_x + text_width // 2
            pointer_y1 = bubble_y + text_height + 2*padding
            draw.polygon(
                [(pointer_x1, pointer_y1),
                 (pointer_x1 - 15, pointer_y1 - 5),
                 (pointer_x1 + 15, pointer_y1 - 5)],
                fill="white",
                outline="black"
            )
        elif position == "bottom":
            # Triangle pointing up
            pointer_x1 = bubble_x + text_width // 2
            pointer_y1 = bubble_y
            draw.polygon(
                [(pointer_x1, pointer_y1),
                 (pointer_x1 - 15, pointer_y1 + 5),
                 (pointer_x1 + 15, pointer_y1 + 5)],
                fill="white",
                outline="black"
            )

        # Draw text
        try:
            # Try to load a comic font
            font = ImageFont.truetype("arial.ttf", 16)
        except:
            # Fallback to default font
            font = ImageFont.load_default()

        # Draw each line of text
        for i, line in enumerate(lines):
            text_x = bubble_x + padding
            text_y = bubble_y + padding + i * line_height
            draw.text(
                (text_x, text_y),
                line,
                fill="black",
                font=font
            )

        return img_with_text

    except Exception as e:
        logger.error(f"Error adding speech bubble: {str(e)}")
        traceback.print_exc()
        return image  # Return original image if error


In [None]:
# 6. IMPROVED COMIC LAYOUT WITH FRAMES
# create_comic_layout function to display panels in a 2x2 grid

def create_comic_layout(panel_images, panel_data=None, num_cols=2):  # Set default num_cols to 2
    logger.info(f"Creating comic layout with {len(panel_images)} panels in 2x2 grid format")

    try:
        # Handle case with no images
        if not panel_images:
            img = Image.new('RGB', (512, 512), color='white')
            d = ImageDraw.Draw(img)
            d.text((10, 10), "No panels generated", fill=(0, 0, 0))
            return img

        # Set up 2x2 layout
        num_panels = len(panel_images)
        num_cols = min(num_cols, 2)  # Ensure we use a maximum of 2 columns
        num_rows = (num_panels + num_cols - 1) // num_cols  # Calculate required rows (ceiling division)

        # Get dimensions from first panel
        panel_width, panel_height = panel_images[0].size

        # Add margin between panels
        margin = 10
        title_height = 80  # Space for title at top
        story_text_height = 150  # Height allocation for story text below panels

        # Create canvas with navy header and space below for stories
        comic_width = panel_width * num_cols + margin * (num_cols + 1)
        comic_height = (panel_height + story_text_height) * num_rows + title_height + margin * (num_rows + 1)
        comic = Image.new('RGB', (comic_width, comic_height), color='white')

        # Add title banner (navy blue)
        draw = ImageDraw.Draw(comic)
        draw.rectangle(
            [(0, 0), (comic_width, title_height)],
            fill="navy"
        )

        # Setup fonts
        try:
            title_font = ImageFont.truetype("arial.ttf", 36)
            story_font = ImageFont.truetype("arial.ttf", 12)
            part_font = ImageFont.truetype("arial.ttf", 14)
        except:
            title_font = ImageFont.load_default()
            story_font = ImageFont.load_default()
            part_font = ImageFont.load_default()

        # Get title from prompt
        title = "Comic Story"
        if panel_data and len(panel_data) > 0:
            # Extract title from first panel's scene
            first_panel = list(panel_data.values())[0]
            title = first_panel.get("scene", "").split("-")[0].strip()

        # Draw title
        draw.text(
            (comic_width // 2, title_height // 2),
            title,
            fill="white",
            font=title_font,
            anchor="mm"
        )

        # Place panels in a 2x2 grid
        for i, img in enumerate(panel_images):
            if i >= num_rows * num_cols:
                break  # Skip if we have more panels than grid positions

            # Calculate row and column position
            row = i // num_cols
            col = i % num_cols

            # Calculate x and y coordinates
            x = margin + col * (panel_width + margin)
            y = title_height + margin + row * (panel_height + story_text_height + margin)

            # Paste panel image
            comic.paste(img, (x, y))

            # Add thin border around panel
            draw.rectangle(
                [(x, y), (x + panel_width - 1, y + panel_height - 1)],
                outline="black",
                width=1
            )

            # Add story text below each panel
            panel_key = f"panel_{i+1}"
            if panel_data and panel_key in panel_data:
                panel_info = panel_data[panel_key]

                # Position for story text below the panel
                text_x = x
                text_y = y + panel_height + 5

                # Calculate max width to keep text aligned with panel above
                max_text_width = panel_width

                # First, add the panel part title in bold
                part_title = panel_info.get("part", f"Panel {i+1}")
                draw.text(
                    (text_x, text_y),
                    part_title,
                    fill="black",
                    font=part_font
                )

                # Now add the main story content below the title
                scene_details = panel_info.get("scene", "").split("-")
                if len(scene_details) > 1:
                    scene_desc = scene_details[1].strip()
                else:
                    scene_desc = "The story continues..."

                # Wrap text to fit panel width
                wrapped_text = textwrap.fill(scene_desc, width=40)

                # Draw the wrapped text below the part title
                draw.multiline_text(
                    (text_x, text_y + 20),  # Offset below the part title
                    wrapped_text,
                    fill="black",
                    font=story_font,
                    align="left"
                )

        # Add footer with slide number and confidential text
        footer_height = 30
        draw.rectangle(
            [(0, comic_height - footer_height), (comic_width, comic_height)],
            fill="white"
        )

        # Add "Intel Confidential" text on right
        draw.text(
            (comic_width - margin * 2, comic_height - footer_height // 2),
            "Intel Confidential",
            fill="black",
            font=story_font,
            anchor="rm"
        )

        # Add slide number
        draw.text(
            (comic_width - margin, comic_height - footer_height // 2),
            str(num_panels),
            fill="black",
            font=story_font,
            anchor="rm"
        )

        return comic

    except Exception as e:
        error_msg = f"Error creating comic layout: {str(e)}"
        logger.error(error_msg)
        traceback.print_exc()

        # Return error image
        img = Image.new('RGB', (800, 600), color='white')
        d = ImageDraw.Draw(img)
        d.text((10, 10), f"Error creating layout: {str(e)}", fill=(0, 0, 0))
        return img


In [None]:

# 7. MAIN COMIC GENERATION FUNCTION WITH PROGRESS UPDATES
def generate_comic(prompt, num_panels=4, style="comic book", progress=None):
    logger.info(f"Starting enhanced comic generation for prompt: '{prompt}'")

    try:
        # Update progress
        if progress:
            progress(0.1, "Initializing models...")

        # Setup models
        tokenizer, text_model = setup_story_model()
        if progress:
            progress(0.2, "Text model loaded")

        image_pipe = setup_image_model()
        if progress:
            progress(0.3, "Image model loaded")

        # Generate structured story
        story_data = generate_story(prompt, num_panels, tokenizer, text_model)
        if progress:
            progress(0.4, "4-part narrative story generated")

        # Generate panel images
        panel_images = []
        for i, (panel_key, panel_data) in enumerate(story_data.items()):
            if progress:
                progress_val = 0.4 + (0.5 * i / len(story_data))
                progress(progress_val, f"Generating panel {i+1}/{len(story_data)}")

            # Generate image for panel
            panel_image = generate_panel_image(panel_data, style, image_pipe)

            # Add dialogue if present
            if panel_data.get("dialogue"):
                # Position the speech bubble based on the panel number
                position = "top" if i % 2 == 0 else "bottom"
                panel_image = add_speech_bubble(panel_image, panel_data["dialogue"], position)

            panel_images.append(panel_image)

        # Create comic layout
        if progress:
            progress(0.9, "Assembling enhanced comic layout")
        comic = create_comic_layout(panel_images, story_data)

        if progress:
            progress(1.0, "Comic generated!")

        logger.info("Enhanced comic generation complete")
        return comic, story_data

    except Exception as e:
        error_msg = f"Error in comic generation: {str(e)}"
        logger.error(error_msg)
        traceback.print_exc()

        # Create error image
        img = Image.new('RGB', (800, 600), color='white')
        d = ImageDraw.Draw(img)
        d.text((10, 10), "Error generating comic:", fill=(255, 0, 0))
        d.text((10, 40), str(e), fill=(0, 0, 0))
        d.text((10, 70), f"Prompt: {prompt}", fill=(0, 0, 0))
        d.text((10, 100), "Please try again with a simpler prompt or fewer panels", fill=(0, 0, 0))

        # Empty story data
        error_data = {"error": str(e)}

        return img, error_data

In [None]:
# 8. IMPROVED GRADIO UI
def create_ui():
    # Status for progress updates
    status_text = gr.State("")

    def generate_with_progress(prompt, num_panels, style, progress=gr.Progress()):
        def update_progress(progress_val, status):
            progress(progress_val, desc=status)
            return status

        try:
            comic, story_data = generate_comic(
                prompt=prompt,
                num_panels=int(num_panels),
                style=style,
                progress=update_progress
            )
            return comic, json.dumps(story_data, indent=2), "Complete"
        except Exception as e:
            error_msg = f"Error: {str(e)}"
            logger.error(error_msg)
            traceback.print_exc()
            return None, error_msg, "Error"

    # Define interface
    with gr.Blocks(theme=gr.themes.Soft()) as demo:
        gr.Markdown("# 🦸 ComicCrafter AI - Generate 4-Part Narrative Comics")

        with gr.Row():
            with gr.Column(scale=1):
                prompt_input = gr.Textbox(
                    label="Story Prompt",
                    placeholder="A superhero fights a giant robot in the city",
                    lines=3,
                    value="A superhero with a red cape fights a giant robot in a city"
                )

                with gr.Row():
                    num_panels = gr.Slider(
                        minimum=1,
                        maximum=4,
                        value=4,
                        step=1,
                        label="Number of Panels"
                    )

                    style = gr.Dropdown(
                        choices=["comic book", "manga", "cartoon", "sketch", "pixel art"],
                        value="comic book",
                        label="Art Style"
                    )

                # Add examples
                gr.Examples(
                    examples=[
                        ["A superhero with laser vision defends a city from an alien invasion", 4, "comic book"],
                        ["A detective solves a mystery in a rainy city at night", 4, "noir"],
                        ["A robot learns to make friends with humans", 4, "cartoon"],
                        ["A ninja warrior battles a dragon in ancient Japan", 4, "manga"]
                    ],
                    inputs=[prompt_input, num_panels, style]
                )

                generate_btn = gr.Button("Generate Comic", variant="primary")
                status = gr.Textbox(label="Status", value="Ready")

                gr.Markdown("""
                ## 📝 How It Works

                This AI comic generator creates a **4-part narrative structure**:

                1. **INTRODUCTION**: Sets the scene and characters
                2. **STORYLINE**: Develops the plot with challenges
                3. **CLIMAX**: The most intense moment of conflict
                4. **RESOLUTION**: Concludes with a lesson or message

                Each image is carefully generated to match its place in the story!
                """)

            with gr.Column(scale=2):
                output_image = gr.Image(label="Generated Comic")
                output_story = gr.Textbox(label="Story Structure", lines=10)

        generate_btn.click(
            generate_with_progress,
            inputs=[prompt_input, num_panels, style],
            outputs=[output_image, output_story, status]
        )

        gr.Markdown("""
        ## 💡 Tips for Success
        - Use descriptive prompts with clear characters and action
        - Try different art styles to match your story's mood
        - For best results, use the full 4-panel story structure
        - Comics follow classic narrative patterns: introduction → conflict → climax → resolution
        """)

    return demo

# Run the app
if __name__ == "__main__":
    print("Starting ComicCrafter AI - Enhanced Version...")
    demo = create_ui()
    demo.launch(debug=True, share=True)

Starting ComicCrafter AI - Enhanced Version...




Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://10aa9022e584544fab.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Token indices sequence length is longer than the specified maximum sequence length for this model (530 > 77). Running this sequence through the model will result in indexing errors
The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['a celestial body shrouded in mystery and intrigue, its very existence whispered only in hushed tones among the most erudite of astronomers. it is a world that has captured the dreams of countless explorers, scientists, and adventurers, who have dared to imagine what wonders and secrets might lie hidden beneath its surface. and it is to this enigmatic planet that our story now takes us, as we follow the intrepid journey of a group of astro

  0%|          | 0/30 [00:00<?, ?it/s]



  0%|          | 0/30 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['of life. as they began to explore the surface of the planet, they noticed something strange. there was a faint signal coming from underground, emanating from a nearby cave system. the signal was unlike anything they had ever seen before, and it seemed to be growing stronger by the minute. the team quickly realized that they needed to investigate this mysterious signal. they made their way into the cave system, using their advanced equipment to navigate through the dark and twisting tunnels. as they delved deeper into the caves, the signal grew even stronger. suddenly, they stumbled upon a massive chamber, lit by a glowing crystal at its center. and there, in the center of the chamber, was the source of the signal. it was a machine unlike anything they had ever seen before. its metallic surface was covered in strange symbols and markings, and it hummed with an eerie energy. the team 

  0%|          | 0/30 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ["were setting up their equipment, one of the astronauts noticed something strange. there was a faint signal coming from underground. the signal was unlike anything they had ever seen before, and it seemed to be emanating from deep within the planet's core. the group was intrigued and decided to investigate further. they descended into the depths of the planet, using their specialized equipment to navigate through the dark and treacherous terrain. as they reached the source of the signal, they found a massive underground cavern filled with strange machinery and technology that they had never seen before. at the center of the cavern was a glowing crystal that seemed to be pulsing with energy. the group was amazed by what they had discovered and spent hours studying the crystal and the surrounding machinery. but as they delved deeper into the cavern, they realized that they were not alo

  0%|          | 0/30 [00:00<?, ?it/s]

ERROR:__main__:Error setting up Mistral-7B model: Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit the quantized model. If you want to dispatch the model on the CPU or the disk while keeping these modules in 32-bit, you need to set `llm_int8_enable_fp32_cpu_offload=True` and pass a custom `device_map` to `from_pretrained`. Check https://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu for more details. 
Traceback (most recent call last):
  File "<ipython-input-3-70fcddc65947>", line 32, in setup_story_model
    model = AutoModelForCausalLM.from_pretrained(
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/transformers/models/auto/auto_factory.py", line 573, in from_pretrained
    return model_class.from_pretrained(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/transformers/modeling_utils.py", line 272, in _wr

Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (116 > 77). Running this sequence through the model will result in indexing errors
The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['police officer, establishing shot, introduction scene, character introduction, manga style artwork, black and white, dramatic lighting, speed lines, shounen style, detailed character art, highly detailed, professional quality']
Token indices sequence length is longer than the specified maximum sequence length for this model (116 > 77). Running this sequence through the model will result in indexing errors
The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['police officer, establishing shot, introduction scene, character introduction, manga style artwork, black and white, dramatic lighting, speed lines, shounen style, detailed character art, highly detai

  0%|          | 0/30 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: [',, manga style artwork, black and white, dramatic lighting, speed lines, shounen style, detailed character art, highly detailed, professional quality']
The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: [',, manga style artwork, black and white, dramatic lighting, speed lines, shounen style, detailed character art, highly detailed, professional quality']


  0%|          | 0/30 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['with detective, witness, peak action, intense moment, dramatic lighting, dynamic pose, high energy scene, manga style artwork, black and white, dramatic lighting, speed lines, shounen style, detailed character art, highly detailed, professional quality']
The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['with detective, witness, peak action, intense moment, dramatic lighting, dynamic pose, high energy scene, manga style artwork, black and white, dramatic lighting, speed lines, shounen style, detailed character art, highly detailed, professional quality']


  0%|          | 0/30 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['and wrong. with detective, culprit, resolving action, emotional conclusion, characters showing relief or triumph, manga style artwork, black and white, dramatic lighting, speed lines, shounen style, detailed character art, highly detailed, professional quality']
The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['and wrong. with detective, culprit, resolving action, emotional conclusion, characters showing relief or triumph, manga style artwork, black and white, dramatic lighting, speed lines, shounen style, detailed character art, highly detailed, professional quality']


  0%|          | 0/30 [00:00<?, ?it/s]

Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://10aa9022e584544fab.gradio.live
