In [None]:
!pip install gradio diffusers torch torchvision torchaudio --quiet



In [None]:
import gradio as gr
import torch
import imageio
from diffusers import KandinskyV22Pipeline
import os

# Load Kandinsky model (lighter than Stable Diffusion)
device = "cpu"  # Change to "cuda" if running on GPU
pipe = KandinskyV22Pipeline.from_pretrained("kandinsky-community/kandinsky-2-2-decoder")
pipe.to(device)

# Animation function
def generate_animation(prompt, num_frames=5):
    frames = []
    for i in range(num_frames):
        print(f"Generating frame {i+1}/{num_frames}...")
        image = pipe(prompt).images[0]
        image_path = f"frame_{i}.png"
        image.save(image_path)
        frames.append(imageio.imread(image_path))

    gif_path = "animation.gif"
    imageio.mimsave(gif_path, frames, fps=3)  # Save as GIF

    return gif_path

# Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("# 🎨 AI Animation Generator (Kandinsky 2.2)")

    with gr.Row():
        prompt = gr.Textbox(label="Enter Prompt", placeholder="A cat flying in space")
        num_frames = gr.Slider(label="Frames", minimum=3, maximum=10, step=1, value=5)

    run_button = gr.Button("Generate Animation")
    output_gif = gr.Image(label="Generated Animation", type="filepath")

    run_button.click(fn=generate_animation, inputs=[prompt, num_frames], outputs=output_gif)

# Launch the app (use share=True for public link)
demo.launch(share=True)


model_index.json:   0%|          | 0.00/250 [00:00<?, ?B/s]

Fetching 6 files:   0%|          | 0/6 [00:00<?, ?it/s]

diffusion_pytorch_model.safetensors:   0%|          | 0.00/271M [00:00<?, ?B/s]

movq%2Fconfig.json:   0%|          | 0.00/660 [00:00<?, ?B/s]

scheduler%2Fscheduler_config.json:   0%|          | 0.00/317 [00:00<?, ?B/s]

unet%2Fconfig.json:   0%|          | 0.00/1.67k [00:00<?, ?B/s]

diffusion_pytorch_model.safetensors:   0%|          | 0.00/5.01G [00:00<?, ?B/s]

Loading pipeline components...:   0%|          | 0/3 [00:00<?, ?it/s]

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://c80cde3590f44992dd.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [21]:
import pandas as pd
import re
from sklearn.model_selection import train_test_split

# Load the dataset (update with correct file path)
df = pd.read_csv("/content/Cartoon_datasets.csv")

# Display the first few rows
print(df.head())

# Function to clean text
def clean_text(text):
    text = text.lower()
    text = re.sub(r"[^a-zA-Z0-9\s]", "", text)  # Remove special characters
    text = re.sub(r"\s+", " ", text).strip()  # Remove extra spaces
    return text

# Apply cleaning function
df["cleaned_text"] = df["Description"].apply(clean_text)

# Split into train and test
train_texts, val_texts = train_test_split(df["cleaned_text"], test_size=0.1, random_state=42)

print(f"Training Samples: {len(train_texts)}, Validation Samples: {len(val_texts)}")


                               Name         Span  \
0       Batman: The Animated Series  (1992–1995)   
1                      Samurai Jack  (2001–2017)   
2        Avatar: The Last Airbender  (2005–2008)   
3                Total Drama Island  (2007–2014)   
4  Scooby-Doo! Mystery Incorporated  (2010–2013)   

                                         Description  Rating  
0  The Dark Knight battles crime in Gotham City w...     9.0  
1  A samurai, sent through time, fights to return...     8.5  
2  In a war-torn world of elemental magic, a youn...     9.3  
3  Animated satire of survivor reality shows feat...     7.4  
4  This incarnation finds Scooby and the gang liv...     8.1  
Training Samples: 90, Validation Samples: 10


In [22]:
import torch
import torch.nn as nn

class LSTMGenerator(nn.Module):
    def __init__(self, vocab_size, embedding_dim=256, hidden_dim=512, output_dim=256):
        super(LSTMGenerator, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, text_input):
        embedded = self.embedding(text_input)
        lstm_out, _ = self.lstm(embedded)
        output = self.fc(lstm_out[:, -1, :])
        return output  # Encoded representation of text

# Initialize model
vocab_size = 10000  # Adjust as per your dataset
model = LSTMGenerator(vocab_size)

# Check model structure
print(model)


LSTMGenerator(
  (embedding): Embedding(10000, 256)
  (lstm): LSTM(256, 512, batch_first=True)
  (fc): Linear(in_features=512, out_features=256, bias=True)
)


In [23]:
from transformers import GPT2Model

class TransformerFrameGenerator(nn.Module):
    def __init__(self, vocab_size, embed_dim=256, num_layers=4):
        super(TransformerFrameGenerator, self).__init__()
        self.encoder = GPT2Model.from_pretrained("gpt2")
        self.fc = nn.Linear(768, embed_dim)

    def forward(self, input_ids):
        encoded_text = self.encoder(input_ids).last_hidden_state
        frame_features = self.fc(encoded_text[:, -1, :])
        return frame_features

# Initialize model
model = TransformerFrameGenerator(vocab_size)

# Check model
print(model)


config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

TransformerFrameGenerator(
  (encoder): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D(nf=2304, nx=768)
          (c_proj): Conv1D(nf=768, nx=768)
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D(nf=3072, nx=768)
          (c_proj): Conv1D(nf=768, nx=3072)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (fc): Linear(in_features=768, out_features=256, bias=True)
)


In [24]:
from diffusers import StableDiffusionPipeline
import torch

# Load stable diffusion model
pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4").to("cuda")

# Generate a frame from text
prompt = "A futuristic city with flying cars at sunset"
image = pipe(prompt).images[0]

# Save image
image.save("generated_frame.png")


model_index.json:   0%|          | 0.00/541 [00:00<?, ?B/s]

Fetching 16 files:   0%|          | 0/16 [00:00<?, ?it/s]

model.safetensors:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/492M [00:00<?, ?B/s]

scheduler_config.json:   0%|          | 0.00/313 [00:00<?, ?B/s]

scheduler_config-checkpoint.json:   0%|          | 0.00/209 [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/525k [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/342 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/592 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/4.56k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/472 [00:00<?, ?B/s]

diffusion_pytorch_model.safetensors:   0%|          | 0.00/3.44G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/806 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/743 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.06M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/551 [00:00<?, ?B/s]

diffusion_pytorch_model.safetensors:   0%|          | 0.00/335M [00:00<?, ?B/s]

Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

In [30]:
import torch
torch.save(model.state_dict(), "model.pth")  # Save model parameters
torch.save(model, "full_model.pth")  # Save entire model


In [35]:
def generate_frame(text, frame_number):
    input_tensor = text_to_tensor(text)

    with torch.no_grad():
        generated_image = model(input_tensor)  # Generate image tensor

    print("Generated Tensor Shape:", generated_image.shape)  # Debugging step

    # Reshape if needed
    if len(generated_image.shape) == 4:  # Model outputs (1, C, H, W)
        generated_image = generated_image.squeeze(0).permute(1, 2, 0)

    generated_image = generated_image.cpu().numpy()

    print("Min Pixel Value:", generated_image.min(), "Max Pixel Value:", generated_image.max())  # Debugging step

    # Normalize if pixel values are incorrect
    generated_image = (generated_image - generated_image.min()) / (generated_image.max() - generated_image.min()) * 255
    generated_image = generated_image.astype(np.uint8)

    # Convert grayscale to RGB
    if len(generated_image.shape) == 2:
        generated_image = np.stack([generated_image] * 3, axis=-1)

    # Convert to PIL Image
    image = Image.fromarray(generated_image)
    frame_path = os.path.join(output_dir, f"frame_{frame_number:04d}.png")
    image.save(frame_path)

    return frame_path


In [36]:

!pip install diffusers transformers accelerate safetensors imageio[ffmpeg] moviepy

import torch
from diffusers import StableDiffusionPipeline
from huggingface_hub import login
from IPython.display import display
import imageio
import os

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.wh

In [37]:
login(token="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")

model_id = "stabilityai/stable-diffusion-2-1"
pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)


model_index.json:   0%|          | 0.00/537 [00:00<?, ?B/s]

Fetching 13 files:   0%|          | 0/13 [00:00<?, ?it/s]

preprocessor_config.json:   0%|          | 0.00/342 [00:00<?, ?B/s]

scheduler_config.json:   0%|          | 0.00/345 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.36G [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/525k [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/824 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/460 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/633 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.06M [00:00<?, ?B/s]

diffusion_pytorch_model.safetensors:   0%|          | 0.00/335M [00:00<?, ?B/s]

diffusion_pytorch_model.safetensors:   0%|          | 0.00/3.46G [00:00<?, ?B/s]

config.json:   0%|          | 0.00/611 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/939 [00:00<?, ?B/s]

Loading pipeline components...:   0%|          | 0/6 [00:00<?, ?it/s]

In [38]:
device = "cuda" if torch.cuda.is_available() else "cpu"
pipe = pipe.to(device)

In [40]:
print("Stable Diffusion Pipeline Loaded Successfully!")

prompt = input("Enter a description for your animation: ")

num_frames = 10
frames = []

for i in range(num_frames):
    print(f"Generating frame {i+1}/{num_frames}...")
    image = pipe(prompt).images[0]
    image_path = f"frame_{i}.png"
    image.save(image_path)
    frames.append(imageio.imread(image_path))


gif_path = "animation.gif"
imageio.mimsave(gif_path, frames, fps=3)


print("Animation saved as animation.gif")
display(imageio.mimread(gif_path))


Stable Diffusion Pipeline Loaded Successfully!
Enter a description for your animation: city
Generating frame 1/10...


  0%|          | 0/50 [00:00<?, ?it/s]

Generating frame 2/10...


  frames.append(imageio.imread(image_path))


  0%|          | 0/50 [00:00<?, ?it/s]

Generating frame 3/10...


  0%|          | 0/50 [00:00<?, ?it/s]

Generating frame 4/10...


  0%|          | 0/50 [00:00<?, ?it/s]

Generating frame 5/10...


  0%|          | 0/50 [00:00<?, ?it/s]

Generating frame 6/10...


  0%|          | 0/50 [00:00<?, ?it/s]

Generating frame 7/10...


  0%|          | 0/50 [00:00<?, ?it/s]

Generating frame 8/10...


  0%|          | 0/50 [00:00<?, ?it/s]

Generating frame 9/10...


  0%|          | 0/50 [00:00<?, ?it/s]

Generating frame 10/10...


  0%|          | 0/50 [00:00<?, ?it/s]

Animation saved as animation.gif


[array([[[153, 135, 118],
         [153, 135, 118],
         [121,  60,  75],
         ...,
         [118, 123,  93],
         [ 85,  75,  67],
         [114, 111,  80]],
 
        [[153, 135, 118],
         [153, 135, 118],
         [ 52,  45,  51],
         ...,
         [153, 135, 118],
         [ 52,  26,  44],
         [ 85,  75,  67]],
 
        [[111,  87,  62],
         [111,  93, 115],
         [ 58,  67,  54],
         ...,
         [154, 111,  96],
         [ 58,  47,  33],
         [ 84,  64,  63]],
 
        ...,
 
        [[ 32,  47,  71],
         [ 44,  59,  79],
         [ 44,  59,  79],
         ...,
         [ 58,  75,  75],
         [ 12,  19,  28],
         [  9,  28,  29]],
 
        [[ 32,  47,  71],
         [ 25,  67,  81],
         [ 25,  67,  81],
         ...,
         [ 44,  59,  79],
         [ 12,  19,  28],
         [  3,  12,  27]],
 
        [[ 32,  47,  71],
         [ 18,  32,  59],
         [ 32,  46,  56],
         ...,
         [ 51,  47,  67],
  

In [39]:
!ls -lh


total 964M
-rw-r--r-- 1 root root 2.4K Mar  8 10:49 animation.mp4
-rw-r--r-- 1 root root  18K Mar  8 10:28 Cartoon_datasets.csv
drwxr-xr-x 5 root root 4.0K Mar  8 10:09 dataset
drwx------ 6 root root 4.0K Mar  8 09:58 drive
drwxr-xr-x 2 root root 4.0K Mar  8 09:59 extracted_frames
-rw-r--r-- 1 root root 488M Mar  8 10:43 full_model.pth
-rw-r--r-- 1 root root 399K Mar  8 10:31 generated_frame.png
drwxr-xr-x 2 root root 4.0K Mar  8 10:49 generated_frames
-rw-r--r-- 1 root root 476M Mar  8 10:43 model.pth
drwxr-xr-x 1 root root 4.0K Mar  6 14:29 sample_data


In [41]:
from google.colab import files
files.download("animation.gif")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>