<a href="https://colab.research.google.com/github/RahulSingh85/AOA/blob/main/Text%20to%20Video.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
print("GPU Available:", torch.cuda.is_available())
print("GPU Name:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "No GPU")


GPU Available: True
GPU Name: Tesla T4


In [2]:
!pip install torch torchvision torchaudio diffusers transformers accelerate opencv-python ffmpeg-python


Collecting ffmpeg-python
  Downloading ffmpeg_python-0.2.0-py3-none-any.whl.metadata (1.7 kB)
Downloading ffmpeg_python-0.2.0-py3-none-any.whl (25 kB)
Installing collected packages: ffmpeg-python
Successfully installed ffmpeg-python-0.2.0


In [6]:
import os
import torch
from diffusers import StableDiffusionPipeline
import cv2
import glob
import numpy as np
from moviepy.video.io.ImageSequenceClip import ImageSequenceClip
from moviepy.video.fx.all import fadein, fadeout
from moviepy.editor import AudioFileClip, CompositeAudioClip

# Check if GPU is available
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", device)

# Load Stable Diffusion Model
try:
    pipeline = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5")
    pipeline.to(device)
    print("Model loaded successfully!")
except Exception as e:
    print("Error loading model:", e)
    exit()

# Get user input
text_prompt = input("Enter text for AI video generation: ")
if not text_prompt.strip():
    print("Error: Text prompt cannot be empty!")
    exit()

# Create output directory
output_dir = "output_images"
os.makedirs(output_dir, exist_ok=True)

# Generate images
num_frames = 10  # Increased number of images for smoother video
fps = 10  # Higher FPS for smoother video
image_paths = []

for i in range(num_frames):
    print(f"Generating frame {i+1}/{num_frames}...")
    try:
        image = pipeline(text_prompt).images[0]
        image_path = os.path.join(output_dir, f"frame_{i:03d}.png")
        image.save(image_path)
        image_paths.append(image_path)
    except Exception as e:
        print(f"Error generating frame {i+1}:", e)
        continue

# Verify if images were generated
if not image_paths:
    print("Error: No images were generated!")
    exit()

print("Images generated successfully:", len(image_paths))

# Apply motion interpolation to smooth transitions
def interpolate_images(image_paths, output_dir):
    interpolated_paths = []
    for i in range(len(image_paths) - 1):
        img1 = cv2.imread(image_paths[i])
        img2 = cv2.imread(image_paths[i + 1])

        if img1 is None or img2 is None:
            print(f"Skipping interpolation for frame {i} due to missing images.")
            continue

        for alpha in np.linspace(0, 1, 5):  # Generate 5 intermediate frames
            blended = cv2.addWeighted(img1, 1 - alpha, img2, alpha, 0)
            interp_path = os.path.join(output_dir, f"interp_{i}_{int(alpha*10)}.png")
            cv2.imwrite(interp_path, blended)
            interpolated_paths.append(interp_path)
    return sorted(image_paths + interpolated_paths)

image_paths = interpolate_images(image_paths, output_dir)
print("Total frames after interpolation:", len(image_paths))

# Convert images to video with transitions
def create_video(image_paths, output_filename, fps, audio_path=None):
    if not image_paths:
        print("Error: No images available to create video!")
        return

    clip = ImageSequenceClip(image_paths, fps=fps)
    clip = fadein(clip, 0.5)
    clip = fadeout(clip, 0.5)

    # Add background music if provided
    if audio_path and os.path.exists(audio_path):
        try:
            audio = AudioFileClip(audio_path)
            audio = audio.set_duration(clip.duration)
            audio = audio.volumex(0.7)  # Reduce volume to 70%
            clip = clip.set_audio(audio)
        except Exception as e:
            print("Error loading audio file:", e)

    clip.write_videofile(output_filename, codec="libx264", audio_codec="aac", bitrate="2000k")

# Define audio file path (user can replace this with their own audio file)
audio_file = "background_music.mp3"  # Ensure this file exists in the working directory

video_filename = "output_video.mp4"
create_video(image_paths, video_filename, fps, audio_file)

print("Video generation complete! Saved as:", video_filename)


Using device: cuda


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.



Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

Model loaded successfully!
Enter text for AI video generation: Sun rises from mountain
Generating frame 1/20...


  0%|          | 0/50 [00:00<?, ?it/s]

Generating frame 2/20...


  0%|          | 0/50 [00:00<?, ?it/s]

Generating frame 3/20...


  0%|          | 0/50 [00:00<?, ?it/s]

Generating frame 4/20...


  0%|          | 0/50 [00:00<?, ?it/s]

Generating frame 5/20...


  0%|          | 0/50 [00:00<?, ?it/s]

Generating frame 6/20...


  0%|          | 0/50 [00:00<?, ?it/s]

Generating frame 7/20...


  0%|          | 0/50 [00:00<?, ?it/s]

Generating frame 8/20...


  0%|          | 0/50 [00:00<?, ?it/s]

Generating frame 9/20...


  0%|          | 0/50 [00:00<?, ?it/s]

Generating frame 10/20...


  0%|          | 0/50 [00:00<?, ?it/s]

Generating frame 11/20...


  0%|          | 0/50 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [4]:
!pip install diffusers transformers accelerate safetensors
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
!pip install numpy opencv-python moviepy


Looking in indexes: https://download.pytorch.org/whl/cu118


In [3]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [4]:
!mkdir -p /content/drive/MyDrive/stable-diffusion


In [7]:
!pip install huggingface_hub




In [8]:
from huggingface_hub import notebook_login
notebook_login()


VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [1]:
!pip install torch

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [9]:
from diffusers import StableDiffusionPipeline

model_path = "/content/drive/MyDrive/stable-diffusion"
pipeline = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16)
pipeline.save_pretrained(model_path)

print("Model downloaded successfully!")


Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

Model downloaded successfully!


In [10]:
from diffusers import StableDiffusionPipeline
import torch

model_path = "/content/drive/MyDrive/stable-diffusion"
pipeline = StableDiffusionPipeline.from_pretrained(model_path, torch_dtype=torch.float16)
pipeline.to("cuda" if torch.cuda.is_available() else "cpu")

print("Model loaded successfully!")


Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

Model loaded successfully!


In [5]:
from diffusers import StableDiffusionPipeline
import torch
import os

# Load pre-trained model
model_path = "/content/stable-diffusion-v1-5/"  # Change this to your local folder
pipeline = StableDiffusionPipeline.from_pretrained(model_path)
pipeline.to("cuda" if torch.cuda.is_available() else "cpu")

# Define training parameters
train_images = "/content/training_images/"
output_model = "/content/trained_model/"
num_train_steps = 1000  # Increase for better fine-tuning
learning_rate = 5e-6

# Load dataset manually
train_data = []
for filename in os.listdir(train_images):
    if filename.endswith(".jpg") or filename.endswith(".png"):
        train_data.append(os.path.join(train_images, filename))

if len(train_data) == 0:
    print("No training images found!")
    exit()

# Simple training loop (DreamBooth-style fine-tuning)
for step in range(num_train_steps):
    img_path = train_data[step % len(train_data)]
    prompt = "A high-quality photo of the object in " + img_path

    # Generate image using the pre-trained model
    generated_image = pipeline(prompt).images[0]

    # Save and overwrite the generated image
    generated_image.save(img_path)

    if step % 100 == 0:
        print(f"Training step {step}/{num_train_steps} completed.")

# Save the fine-tuned model
pipeline.save_pretrained(output_model)
print("Training complete! Model saved at:", output_model)


ValueError: The provided pretrained_model_name_or_path "/content/stable-diffusion-v1-5/" is neither a valid local path nor a valid repo id. Please check the parameter.