In [1]:
import os
import requests
import json



### Image Generation

In [2]:
!pip install ffmpeg-python


Collecting ffmpeg-python
  Downloading ffmpeg_python-0.2.0-py3-none-any.whl.metadata (1.7 kB)
Collecting future (from ffmpeg-python)
  Downloading future-1.0.0-py3-none-any.whl.metadata (4.0 kB)
Downloading ffmpeg_python-0.2.0-py3-none-any.whl (25 kB)
Downloading future-1.0.0-py3-none-any.whl (491 kB)
Installing collected packages: future, ffmpeg-python
Successfully installed ffmpeg-python-0.2.0 future-1.0.0


In [136]:
# def download_image(prompt, fname):
#     url = f"https://pollinations.ai/p/{prompt}"
#     print(f'f:\\Gen_AI\\git_gen_ai\\Learning\\shorts_generator\\files\\{fname}.jpg')
#     response = requests.get(url)
#     with open(f'f:\\Gen_AI\\git_gen_ai\\Learning\\shorts_generator\\files\\{fname}.jpg', 'wb') as file:
#         file.write(response.content)
#     print('Image downloaded!')


In [14]:
from PIL import Image, ImageOps
import requests
from io import BytesIO

def download_image(prompt, fname, target_width=720, target_height=1280, quality=20):
    """
    Downloads and downsizes an image to YouTube Shorts dimensions, maintaining aspect ratio.

    Parameters:
    - prompt (str): The image prompt for the URL.
    - fname (str): The filename to save the image as.
    - target_width (int): Desired width for YouTube Shorts (default: 720).
    - target_height (int): Desired height for YouTube Shorts (default: 1280).
    - quality (int): Image quality percentage (1-100, default: 75).
    """
    
    # Download image
    url = f"https://pollinations.ai/p/{prompt}"
    response = requests.get(url)

    if response.status_code == 200:
        img = Image.open(BytesIO(response.content))

        # Resize while maintaining aspect ratio
        img.thumbnail((target_width, target_height), Image.LANCZOS)

        # Add black padding to fit exact Shorts dimensions
        img_with_padding = ImageOps.pad(img, (target_width, target_height), color=(0, 0, 0))

        # Save the image with reduced quality
        output_path = f'f:\\Gen_AI\\git_gen_ai\\Learning\\shorts_generator\\files\\{fname}.jpg'
        img_with_padding.save(output_path, format='JPEG', quality=quality)

        print(f"‚úÖ Image downloaded and resized to {target_width}x{target_height} at {quality}% quality: {output_path}")
    
    else:
        print(f"‚ö†Ô∏è Failed to download image. Status code: {response.status_code}")


In [15]:
download_image("conceptual_isometric_world_of_pollinations_ai_surreal_hyperrealistic_digital_garden", "test")

‚úÖ Image downloaded and resized to 720x1280 at 20% quality: f:\Gen_AI\git_gen_ai\Learning\shorts_generator\files\test.jpg


In [6]:
# Base URL
BASE_URL = "https://text.pollinations.ai"

from pydub import AudioSegment

def generate_audio(prompt, voice="alloy", method="GET", output_file="generated_audio.mp3"):
    """
    Generate audio using the Audio Generation API.

    Args:
    - prompt (str): Text to be converted to audio.
    - voice (str): Voice for the audio (default: "alloy").
    - method (str): HTTP method ("GET" or "POST").
    - output_file (str): Filename for the generated audio.

    Returns:
    - str: Path to the saved audio file or error message.
    """
    
    if method.upper() == "GET":
        url = f"{BASE_URL}/{prompt}?model=openai-audio&voice={voice}"
        try:
            response = requests.get(url)

            if response.status_code == 200:
                with open(output_file, "wb") as f:
                    f.write(response.content)
                
                sound = AudioSegment.from_file(output_file).export(output_file, format="mp3", bitrate="8k")
                print(f"‚úÖ Audio saved as '{output_file}'")

                return output_file
            else:
                print(f"‚ùå Error: {response.status_code}")
                return response.text

        except Exception as e:
            print(f"‚ùå Exception: {e}")
            return str(e)

    else:
        print("‚ùå Invalid method. Use 'GET' or 'POST'")
        return "Invalid method"

ModuleNotFoundError: No module named 'pyaudioop'

In [None]:
generate_audio(
        "Get ready for breathtaking views! Let's explore the top highest places on Earth", 
        voice="alloy", 
        method="GET", 
        output_file=f"files/test.mp3",
    )

‚úÖ Audio saved as 'files/test.mp3'


'files/test.mp3'

### local Ollama - Script  Generation

In [None]:
import ollama

# Replace 'llama2' with the name of your local model
model_name = 'llama3.2:latest'  

# Define the prompt
prompt = """
You are a **YouTube Shorts content creator**.  
Your task is to **write a simple 30-second video script** based on the provided details.  

### üé• **Script Instructions:**  
- **Divide the script into sections ( At max 10 sections only if needed, average around 5 sections)** with clear timestamps for each section.  
- Each section should have a **text description** that will appear on the screen.  
- Indicate the **time duration** for how long each section's text will remain on screen.  

### üé® **Image Generation Instructions:**  
- For each section, **generate an image prompt** that captures the essence of the content.  
- The image prompts will be used to create visuals for the video.  
- Ensure the prompts are **detailed and descriptive**, specifying the scenery, mood, and style.  
- **Do not include more than 5 sections.**

---

### üõë **Output Format in JSON (Strictly Follow, keep SCRIPT and IMAGE_PROMPTS sections separate not mixed):**

{
    "start_time-end_time": {
        "text" : content_of_the_script,
        "image_prompt" : img_prompt
    }
}

Example:
{
    '0s-3s': {
        'text': "Get ready for breathtaking views! Let's explore the top highest places on Earth",
        'image_prompt': 'Aerial view of a majestic mountain range with snow-capped peaks, sun rising behind it'
    },
    '4s-7s': {
        'text': "From Mount Everest to Kilimanjaro, we'll take you on a journey to the roof of the world",
        'image_prompt': "Close-up shot of Mount Everest's summit with a flag waving in the wind"
    }
}

---
**TOPIC:** Top highest places in the world ranked as top 5 and each sentence explaning one.  

Make sure the **script is engaging, concise, and easy to visualize**.  ALSO IT HAS TO BE IN JSON FORMAT 
"""

# Send the request
response = ollama.chat(model=model_name, messages=[{"role": "user", "content": prompt}])

# print(response)
# Display the response
print("Response:", response['message']['content'])


Response: Here's a script for a 30-second YouTube Shorts video on the topic "Top Highest Places in the World Ranked as Top 5":

```
{
  "0s-3s": {
    "text": "Get ready for breathtaking views! Let's explore the top highest places on Earth",
    "image_prompt": "Aerial view of a majestic mountain range with snow-capped peaks, sun rising behind it"
  },
  "4s-7s": {
    "text": "From Mount Everest to Kilimanjaro, we'll take you on a journey to the roof of the world",
    "image_prompt": "Close-up shot of Mount Everest's summit with a flag waving in the wind"
  },
  "8s-11s": {
    "text": "Number 5: Mount Aconcagua, Argentina - The highest peak outside Asia at 22,841ft",
    "image_prompt": "Panoramic view of Mount Aconcagua, with a snow-capped summit and surrounding mountains"
  },
  "12s-15s": {
    "text": "Number 4: K2, Pakistan/China - The second-highest peak in the world at 28,251ft",
    "image_prompt": "Dramatic shot of K2's rugged terrain, with snow and ice-covered peaks"
  },


In [109]:
script_data = json.loads(response['message']['content'].replace("\n","").split("```")[1].split("```")[0])
script_data


{'0s-3s': {'text': "Get ready for breathtaking views! Let's explore the top highest places on Earth",
  'image_prompt': 'Aerial view of a majestic mountain range with snow-capped peaks, sun rising behind it'},
 '4s-7s': {'text': "From Mount Everest to Kilimanjaro, we'll take you on a journey to the roof of the world",
  'image_prompt': "Close-up shot of Mount Everest's summit with a flag waving in the wind"},
 '8s-11s': {'text': 'Number 5: Mount Aconcagua, Argentina - The highest peak outside Asia at 22,841ft',
  'image_prompt': 'Panoramic view of Mount Aconcagua, with a snow-capped summit and surrounding mountains'},
 '12s-15s': {'text': 'Number 4: K2, Pakistan/China - The second-highest peak in the world at 28,251ft',
  'image_prompt': "Dramatic shot of K2's rugged terrain, with snow and ice-covered peaks"},
 '16s-20s': {'text': 'Number 3: Kangchenjunga, Nepal/India - The third-highest peak in the world at 28,169ft',
  'image_prompt': 'Lush green forests and towering mountains of Kan

In [142]:
for ts, val in script_data.items():
    text, img_caption = val["text"], val["image_prompt"]
    print(ts, text, img_caption)

    download_image(
        prompt=img_caption,
        fname=ts
    )

    generate_audio(
        text, 
        voice="alloy", 
        method="GET", 
        output_file=f"files/{ts}.mp3"
    )

    


0s-3s Get ready for breathtaking views! Let's explore the top highest places on Earth Aerial view of a majestic mountain range with snow-capped peaks, sun rising behind it
‚úÖ Image downloaded and resized to 720x1280 at 20% quality: f:\Gen_AI\git_gen_ai\Learning\shorts_generator\files\0s-3s.jpg
‚úÖ Audio saved as 'files/0s-3s.mp3'
4s-7s From Mount Everest to Kilimanjaro, we'll take you on a journey to the roof of the world Close-up shot of Mount Everest's summit with a flag waving in the wind
‚úÖ Image downloaded and resized to 720x1280 at 20% quality: f:\Gen_AI\git_gen_ai\Learning\shorts_generator\files\4s-7s.jpg
‚úÖ Audio saved as 'files/4s-7s.mp3'
8s-11s Number 5: Mount Aconcagua, Argentina - The highest peak outside Asia at 22,841ft Panoramic view of Mount Aconcagua, with a snow-capped summit and surrounding mountains
‚úÖ Image downloaded and resized to 720x1280 at 20% quality: f:\Gen_AI\git_gen_ai\Learning\shorts_generator\files\8s-11s.jpg
‚úÖ Audio saved as 'files/8s-11s.mp3'
12s

### Final Video Generation

In [8]:
from moviepy import *
from PIL import Image
from moviepy import *
from PIL import ImageFont


import os

In [None]:

# JSON data
video_script = {
  "0s-3s": {
    "text": "Get ready for breathtaking views! Let's explore the top highest places on Earth",
    "image_prompt": "Aerial view of a majestic mountain range with snow-capped peaks, sun rising behind it"
  },
  "4s-7s": {
    "text": "From Mount Everest to Kilimanjaro, we'll take you on a journey to the roof of the world",
    "image_prompt": "Close-up shot of Mount Everest's summit with a flag waving in the wind"
  },
  "8s-11s": {
    "text": "Number 5: Mount Aconcagua, Argentina - The highest peak outside Asia at 22,841ft",
    "image_prompt": "Panoramic view of Mount Aconcagua, with a snow-capped summit and surrounding mountains"
  },
  "12s-15s": {
    "text": "Number 4: K2, Pakistan/China - The second-highest peak in the world at 28,251ft",
    "image_prompt": "Dramatic shot of K2's rugged terrain, with snow and ice-covered peaks"
  },
  "16s-20s": {
    "text": "Number 3: Kangchenjunga, Nepal/India - The third-highest peak in the world at 28,169ft",
    "image_prompt": "Lush green forests and towering mountains of Kangchenjunga, with a misty atmosphere"
  },
  "21s-25s": {
    "text": "Number 2: Mount Denali, Alaska/USA - The highest peak in North America at 20,310ft",
    "image_prompt": "Snowy landscape of Mount Denali, with a majestic glacier and surrounding peaks"
  },
  "26s-30s": {
    "text": "And the number one spot goes to... Mount Everest! The highest peak in the world at 29,029ft",
    "image_prompt": "Iconic shot of Mount Everest's summit, with a flag waving in the wind and a breathtaking view"
  }
}

# Parameters
output_file = "highest_places_video.mp4"
fps = 24  # Frame rate
transition_duration = 1  # Transition duration in seconds

# Function to extract timestamps
def get_time_range(timestamp):
    """Extract start, end, and duration from the timestamp."""
    start, end = timestamp.split('-')
    start_sec = int(start[:-1])
    end_sec = int(end[:-1])
    duration = end_sec - start_sec
    return start_sec, end_sec, duration

# Create list to hold the clips
clips = []

# Generate video clips from the images
for timestamp, data in video_script.items():
    image_path = f"files/{timestamp}.jpg"  # Image filename based on timestamp
    audio_path = f"files/{timestamp}.mp3"
    start, end, duration = get_time_range(timestamp)

    if not os.path.exists(image_path):
        print(f"‚ö†Ô∏è Warning: {image_path} not found.")
        continue

    # Create image clip
    
    audio_clip = AudioFileClip(audio_path)
    duration = audio_clip.duration + 1
    img_clip = ImageClip(image_path).with_duration(duration).with_duration(duration)

    print(timestamp, duration)

    # Add text overlay
    txt_clip = TextClip(
        method = "caption",
        text=data['text'],
        transparent = True,
        font_size=40,
        margin = (0, 10),
        color='white',
        bg_color="black",
        size=(img_clip.w-100, None),
        font="./arial.ttf",
        text_align="center",
    ).with_duration(duration).with_position(('center', 'bottom'))

    # Combine image and text into one clip
    final_clip = CompositeVideoClip([img_clip, txt_clip]).with_duration(duration).with_audio(audio_clip)

    # Append the clip to the list
    clips.append(final_clip)

# Apply crossfade transitions between clips
# Using padding to create a crossfade effect
final_video = concatenate_videoclips(clips, padding=-transition_duration, method="compose")

# Export the final video
final_video.write_videofile(output_file, fps=fps, codec='libx264')
# final_video.write_videofile(
#     output_file,
#     fps=fps,
#     codec='libx264',
#     audio_codec='aac',  # Use AAC codec to avoid compatibility issues
#     threads=1  # Disable multithreading for stability
# )

print(f"‚úÖ Video saved as {output_file}")


0s-3s 5.42
4s-7s 6.07
8s-11s 10.05
12s-15s 10.22
16s-20s 11.01
21s-25s 8.8
26s-30s 9.86
MoviePy - Building video highest_places_video.mp4.
MoviePy - Writing audio in highest_places_videoTEMP_MPY_wvf_snd.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing video highest_places_video.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready highest_places_video.mp4
‚úÖ Video saved as highest_places_video.mp4




### TTS Testing

In [20]:
!pip install gTTS

Collecting gTTS
  Downloading gTTS-2.5.4-py3-none-any.whl.metadata (4.1 kB)
Downloading gTTS-2.5.4-py3-none-any.whl (29 kB)
Installing collected packages: gTTS
Successfully installed gTTS-2.5.4


In [17]:
from pyt2s.services import stream_elements


In [21]:
text = "A calendar with marked appointments and deadlines, surrounded by productivity symbols like clocks and briefcas."

In [19]:
# Default Voice
# data = stream_elements.requestTTS('Lorem Ipsum is simply dummy text.')

# Custom Voice
data = stream_elements.requestTTS('A calendar with marked appointments and deadlines, surrounded by productivity symbols like clocks and briefcas.', stream_elements.Voice.Russell.value)

with open('output.mp3', '+wb') as file:
    file.write(data)

In [22]:
from gtts import gTTS
tts = gTTS(text)
tts.save('hello.mp3')

In [23]:
len(text)

111

## Eleven Labs


In [24]:
!pip install elevenlabs



In [40]:
from dotenv import load_dotenv
from elevenlabs.client import ElevenLabs
from elevenlabs import play ,save

load_dotenv()



True

In [42]:
client = ElevenLabs(
  api_key=os.getenv("ELEVENLABS_API_KEY"),
)
audio = client.text_to_speech.convert(
    text="The first.",
    voice_id="JBFqnCBsd6RMkjVDRZzb",
    model_id="eleven_multilingual_v2",
    output_format="mp3_44100_128",
)

save( audio, "sample.mp3",)