In [1]:
!pip install transformers



# **Chat-GPT**

In [5]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

def translate_text(text, source_lang, target_lang):
    """
    Translate text using Helsinki-NLP models from Hugging Face.
    """
    # Define the model for the specific language pair
    model_name = f"Helsinki-NLP/opus-mt-{source_lang}-{target_lang}"

    # Load the tokenizer and model
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

    # Tokenize the input text
    inputs = tokenizer([text], return_tensors="pt", padding=True, truncation=True)

    # Generate translation
    translated = model.generate(**inputs)

    # Decode the output to get the translated text
    translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
    return translated_text

# Example Usage
if __name__ == "__main__":
    source_language = input("Enter the source language code (e.g., 'en' for English): ").strip()
    target_language = input("Enter the target language code (e.g., 'fr' for French): ").strip()
    text_to_translate = input("Enter the text to translate: ").strip()

    print(f"Original text: {text_to_translate}")
    try:
        translated_text = translate_text(text_to_translate, source_language, target_language)
        print(f"Translated text: {translated_text}")
    except Exception as e:
        print(f"An error occurred during translation: {e}")


Enter the source language code (e.g., 'en' for English): en
Enter the target language code (e.g., 'fr' for French): fr
Enter the text to translate: SAY MY NAME, MY FRIEND
Original text: SAY MY NAME, MY FRIEND
Translated text: Dis mon nom, mon ami.


# **Grok-Ai**

In [7]:
# Install required packages in Colab
!pip install transformers torch sentencepiece tqdm -q

from transformers import pipeline
from tqdm.notebook import tqdm
import torch

def translate_text(text, source_lang, target_lang, max_length=512):
    """
    Translate text using MarianMT models via transformers pipeline.

    Args:
        text (str): Text to translate
        source_lang (str): Source language code (e.g., 'en' for English)
        target_lang (str): Target language code (e.g., 'fr' for French)
        max_length (int): Maximum length of the output sequence

    Returns:
        str: Translated text
    """
    try:
        # Construct model name
        model_name = f"Helsinki-NLP/opus-mt-{source_lang}-{target_lang}"

        # Check if GPU is available
        device = 0 if torch.cuda.is_available() else -1

        # Initialize translation pipeline
        print(f"Loading model {model_name}...")
        translator = pipeline(
            "translation",
            model=model_name,
            device=device,
            max_length=max_length
        )

        # Perform translation with progress bar
        print("Translating...")
        with tqdm(total=1, desc="Translation Progress") as pbar:
            translated_text = translator(text)[0]['translation_text']
            pbar.update(1)

        return translated_text

    except Exception as e:
        raise Exception(f"Translation failed: {str(e)}. Make sure the language pair is supported.")

def main():
    """Main function to run the translation demo in Colab."""
    print("Welcome to the Translation Demo!")
    print("Available languages: en (English), fr (French), es (Spanish), de (German), etc.")
    print("See Helsinki-NLP/opus-mt models for full list: https://huggingface.co/Helsinki-NLP")

    # Get user input
    source_language = input("Enter the source language code (e.g., 'en'): ").strip()
    target_language = input("Enter the target language code (e.g., 'fr'): ").strip()
    text_to_translate = input("Enter the text to translate: ").strip()

    print(f"\nOriginal text: {text_to_translate}")
    try:
        # Translate the text
        translated_text = translate_text(text_to_translate, source_language, target_language)
        print(f"Translated text: {translated_text}")
    except Exception as e:
        print(f"Error: {e}")

# Run the demo
if __name__ == "__main__":
    main()

Welcome to the Translation Demo!
Available languages: en (English), fr (French), es (Spanish), de (German), etc.
See Helsinki-NLP/opus-mt models for full list: https://huggingface.co/Helsinki-NLP
Enter the source language code (e.g., 'en'): en
Enter the target language code (e.g., 'fr'): es
Enter the text to translate: I AM NOT IN DANGER..I AM THE DANGER

Original text: I AM NOT IN DANGER..I AM THE DANGER
Loading model Helsinki-NLP/opus-mt-en-es...


config.json:   0%|          | 0.00/1.47k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/312M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/312M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/44.0 [00:00<?, ?B/s]

source.spm:   0%|          | 0.00/802k [00:00<?, ?B/s]

target.spm:   0%|          | 0.00/826k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.59M [00:00<?, ?B/s]

Device set to use cpu


Translating...


Translation Progress:   0%|          | 0/1 [00:00<?, ?it/s]

Translated text: NO ESTOY EN PELIGRO... SOY EL PELIGRO


# **TTS-GROK**

In [9]:
# Install required libraries (unchanged)
!pip install gTTS  # Google Text-to-Speech
!pip install pyttsx3  # Offline TTS engine
!pip install IPython  # For audio playback in Colab
!pip install langdetect  # For language detection
!apt-get install -y espeak libespeak1  # Required for pyttsx3

# Import necessary modules (unchanged)
from gtts import gTTS
import pyttsx3
from IPython.display import Audio, display
from langdetect import detect
import os
import time

# AdvancedTTS class (unchanged)
class AdvancedTTS:
    def __init__(self):
        self.offline_engine = pyttsx3.init()
        self.voices = self.offline_engine.getProperty('voices')
        self.output_dir = "tts_outputs"
        os.makedirs(self.output_dir, exist_ok=True)

    def detect_language(self, text):
        try:
            return detect(text)
        except:
            return "en"

    def get_available_voices(self):
        return [(voice.id, voice.name) for voice in self.voices]

    def google_tts(self, text, lang=None, slow=False, filename="output.mp3"):
        try:
            if not lang:
                lang = self.detect_language(text)
            tts = gTTS(text=text, lang=lang, slow=slow, lang_check=True)
            output_path = os.path.join(self.output_dir, filename)
            tts.save(output_path)
            return output_path
        except Exception as e:
            print(f"Google TTS Error: {str(e)}")
            return None

    def offline_tts(self, text, voice_id=None, rate=200, filename="offline_output.mp3"):
        try:
            if voice_id and voice_id in [v.id for v in self.voices]:
                self.offline_engine.setProperty('voice', voice_id)
            self.offline_engine.setProperty('rate', rate)
            output_path = os.path.join(self.output_dir, filename)
            self.offline_engine.save_to_file(text, output_path)
            self.offline_engine.runAndWait()
            return output_path
        except Exception as e:
            print(f"Offline TTS Error: {str(e)}")
            return None

    def play_audio(self, filepath):
        if filepath and os.path.exists(filepath):
            display(Audio(filepath, autoplay=True))
            time.sleep(os.path.getsize(filepath) / 1000000 + 1)
        else:
            print("Audio file not found!")

# Modified main function to get user input
def main():
    tts_system = AdvancedTTS()

    print("Welcome to the Advanced TTS System!")
    print("Available voices (showing first 5):")
    for voice_id, voice_name in tts_system.get_available_voices()[:5]:
        print(f"ID: {voice_id}, Name: {voice_name}")

    while True:
        # Get user input
        print("\nEnter the text you want to convert to speech (or 'quit' to exit):")
        user_text = input("> ").strip()

        if user_text.lower() == 'quit':
            print("Exiting TTS system. Goodbye!")
            break

        if not user_text:
            print("Please enter some text!")
            continue

        # Choose TTS method
        print("\nChoose TTS method: (1) Google TTS (online), (2) Offline TTS")
        method = input("Enter 1 or 2: ").strip()

        if method == "1":
            # Google TTS
            lang = tts_system.detect_language(user_text)
            print(f"Detected language: {lang}")
            filepath = tts_system.google_tts(
                text=user_text,
                lang=lang,
                slow=False,
                filename="user_google_output.mp3"
            )
            print("Playing Google TTS output...")
            tts_system.play_audio(filepath)

        elif method == "2":
            # Offline TTS
            voices = tts_system.get_available_voices()
            if voices:
                print("Using first available voice. Enter a voice ID (or press Enter for default):")
                voice_input = input("> ").strip()
                voice_id = voice_input if voice_input in [v[0] for v in voices] else voices[0][0]

                filepath = tts_system.offline_tts(
                    text=user_text,
                    voice_id=voice_id,
                    rate=180,
                    filename="user_offline_output.mp3"
                )
                print("Playing offline TTS output...")
                tts_system.play_audio(filepath)
            else:
                print("No voices available for offline TTS!")

        else:
            print("Invalid choice! Please enter 1 or 2.")

if __name__ == "__main__":
    try:
        main()
    except KeyboardInterrupt:
        print("Stopped by user")
    except Exception as e:
        print(f"An error occurred: {str(e)}")

# Clean up (optional, unchanged)
# import shutil
# shutil.rmtree("tts_outputs", ignore_errors=True)

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
espeak is already the newest version (1.48.15+dfsg-3).
libespeak1 is already the newest version (1.48.15+dfsg-3).
0 upgraded, 0 newly installed, 0 to remove and 29 not upgraded.
Welcome to the Advanced TTS System!
Available voices (showing first 5):
ID: afrikaans, Name: afrikaans
ID: aragonese, Name: aragonese
ID: bulgarian, Name: bulgarian
ID: bengali, Name: bengali
ID: bosnian, Name: bosnian

Enter the text you want to convert to speech (or 'quit' to exit):
> say my name

Choose TTS method: (1) Google TTS (online), (2) Offline TTS
Enter 1 or 2: 1
Detected language: tl
Playing Google TTS output...



Enter the text you want to convert to speech (or 'quit' to exit):
Stopped by user


In [10]:
# Install required libraries (unchanged)
!pip install gTTS  # Google Text-to-Speech
!pip install pyttsx3  # Offline TTS engine
!pip install IPython  # For audio playback in Colab
!pip install langdetect  # For language detection
!apt-get install -y espeak libespeak1  # Required for pyttsx3

# Import necessary modules (unchanged)
from gtts import gTTS
import pyttsx3
from IPython.display import Audio, display
from langdetect import detect
import os
import time

# AdvancedTTS class (unchanged except for minor clarification)
class AdvancedTTS:
    def __init__(self):
        self.offline_engine = pyttsx3.init()
        self.voices = self.offline_engine.getProperty('voices')
        self.output_dir = "tts_outputs"
        os.makedirs(self.output_dir, exist_ok=True)

    def detect_language(self, text):
        try:
            return detect(text)
        except:
            return "en"  # Default to English if detection fails

    def get_available_voices(self):
        return [(voice.id, voice.name) for voice in self.voices]

    def google_tts(self, text, lang=None, slow=False, filename="output.mp3"):
        try:
            if not lang:
                lang = self.detect_language(text)
            tts = gTTS(text=text, lang=lang, slow=slow, lang_check=True)
            output_path = os.path.join(self.output_dir, filename)
            tts.save(output_path)
            return output_path
        except Exception as e:
            print(f"Google TTS Error: {str(e)}")
            return None

    def offline_tts(self, text, voice_id=None, rate=200, filename="offline_output.mp3"):
        try:
            if voice_id and voice_id in [v.id for v in self.voices]:
                self.offline_engine.setProperty('voice', voice_id)
            self.offline_engine.setProperty('rate', rate)
            output_path = os.path.join(self.output_dir, filename)
            self.offline_engine.save_to_file(text, output_path)
            self.offline_engine.runAndWait()
            return output_path
        except Exception as e:
            print(f"Offline TTS Error: {str(e)}")
            return None

    def play_audio(self, filepath):
        if filepath and os.path.exists(filepath):
            display(Audio(filepath, autoplay=True))
            time.sleep(os.path.getsize(filepath) / 1000000 + 1)
        else:
            print("Audio file not found!")

# Modified main function with "Kokoro" integration
def main():
    tts_system = AdvancedTTS()

    print("Welcome to the Advanced TTS System!")
    print("Available voices (showing first 5):")
    for voice_id, voice_name in tts_system.get_available_voices()[:5]:
        print(f"ID: {voice_id}, Name: {voice_name}")

    # Demo "Kokoro" in Japanese with Google TTS
    print("\nDemonstrating 'Kokoro' (心) in Japanese with Google TTS...")
    kokoro_text = "心"  # "Kokoro" in Japanese kanji
    filepath = tts_system.google_tts(
        text=kokoro_text,
        lang="ja",  # Explicitly set to Japanese
        slow=False,
        filename="kokoro_demo.mp3"
    )
    print("Playing 'Kokoro' demo...")
    tts_system.play_audio(filepath)

    # User input loop
    while True:
        print("\nEnter text to convert to speech (or 'quit' to exit, 'kokoro' for Japanese demo):")
        user_text = input("> ").strip()

        if user_text.lower() == 'quit':
            print("Exiting TTS system. Goodbye!")
            break

        if user_text.lower() == 'kokoro':
            user_text = "心"  # Replace with Japanese "Kokoro"
            print("Using 'Kokoro' (心) in Japanese...")

        if not user_text:
            print("Please enter some text!")
            continue

        # Choose TTS method
        print("\nChoose TTS method: (1) Google TTS (online), (2) Offline TTS")
        method = input("Enter 1 or 2: ").strip()

        if method == "1":
            # Google TTS
            lang = "ja" if user_text == "心" else tts_system.detect_language(user_text)
            print(f"Detected/Selected language: {lang}")
            filepath = tts_system.google_tts(
                text=user_text,
                lang=lang,
                slow=False,
                filename="user_google_output.mp3"
            )
            print("Playing Google TTS output...")
            tts_system.play_audio(filepath)

        elif method == "2":
            # Offline TTS (note: pyttsx3 may not support Japanese well)
            voices = tts_system.get_available_voices()
            if voices:
                print("Using first available voice. Enter a voice ID (or press Enter for default):")
                voice_input = input("> ").strip()
                voice_id = voice_input if voice_input in [v[0] for v in voices] else voices[0][0]

                filepath = tts_system.offline_tts(
                    text=user_text,
                    voice_id=voice_id,
                    rate=180,
                    filename="user_offline_output.mp3"
                )
                print("Playing offline TTS output...")
                tts_system.play_audio(filepath)
            else:
                print("No voices available for offline TTS!")

        else:
            print("Invalid choice! Please enter 1 or 2.")

if __name__ == "__main__":
    try:
        main()
    except KeyboardInterrupt:
        print("Stopped by user")
    except Exception as e:
        print(f"An error occurred: {str(e)}")

# Clean up (optional, unchanged)
# import shutil
# shutil.rmtree("tts_outputs", ignore_errors=True)

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
espeak is already the newest version (1.48.15+dfsg-3).
libespeak1 is already the newest version (1.48.15+dfsg-3).
0 upgraded, 0 newly installed, 0 to remove and 29 not upgraded.
Welcome to the Advanced TTS System!
Available voices (showing first 5):
ID: afrikaans, Name: afrikaans
ID: aragonese, Name: aragonese
ID: bulgarian, Name: bulgarian
ID: bengali, Name: bengali
ID: bosnian, Name: bosnian

Demonstrating 'Kokoro' (心) in Japanese with Google TTS...
Playing 'Kokoro' demo...



Enter text to convert to speech (or 'quit' to exit, 'kokoro' for Japanese demo):
> let him cook

Choose TTS method: (1) Google TTS (online), (2) Offline TTS
Enter 1 or 2: 1
Detected/Selected language: en
Playing Google TTS output...



Enter text to convert to speech (or 'quit' to exit, 'kokoro' for Japanese demo):
Stopped by user


# **TEXT TO VIDEO GENRATOR**

In [11]:
# Install required libraries
!pip install torch torchvision  # PyTorch for model inference
!pip install huggingface_hub  # To download model from Hugging Face
!pip install "xfuser>=0.4.1"  # Required for Wan 2.1 inference (multi-GPU support)
!pip install IPython  # For video playback in Colab

# Import necessary modules
import torch
import os
from huggingface_hub import hf_hub_download, snapshot_download
from IPython.display import Video, display
import subprocess
import time

class WanTextToVideo:
    def __init__(self, model_id="Wan-AI/Wan2.1-T2V-14B", output_dir="video_outputs"):
        """Initialize the Wan 2.1 T2V model."""
        self.model_id = model_id
        self.output_dir = output_dir
        os.makedirs(self.output_dir, exist_ok=True)
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self.checkpoint_dir = "./Wan2.1-T2V-14B"
        self._setup_model()

    def _setup_model(self):
        """Download model weights and inference script from Hugging Face."""
        print("Downloading Wan2.1-T2V-14B model...")
        snapshot_download(repo_id=self.model_id, local_dir=self.checkpoint_dir)
        # Ensure generate.py (inference script) is available; typically provided in repo
        if not os.path.exists(os.path.join(self.checkpoint_dir, "generate.py")):
            raise FileNotFoundError("generate.py not found in model directory. Please ensure it's included in the repo.")

    def generate_video(self, prompt, resolution="1280x720", filename="output.mp4"):
        """Generate a video from a text prompt using Wan2.1-T2V-14B."""
        try:
            output_path = os.path.join(self.output_dir, filename)
            # Construct the command based on Wan 2.1 documentation
            command = [
                "python", os.path.join(self.checkpoint_dir, "generate.py"),
                "--task", "t2v-14B",
                "--size", resolution,
                "--ckpt_dir", self.checkpoint_dir,
                "--prompt", prompt,
                "--output", output_path
            ]

            # Optional optimization flags for consumer GPUs (e.g., RTX 4090)
            if self.device == "cuda" and torch.cuda.get_device_properties(0).total_memory < 24e9:  # Less than 24GB VRAM
                command.extend(["--offload_model", "True", "--t5_cpu"])

            print(f"Running command: {' '.join(command)}")
            result = subprocess.run(command, capture_output=True, text=True)

            if result.returncode != 0:
                print(f"Error generating video: {result.stderr}")
                return None

            print(f"Video generated at: {output_path}")
            return output_path

        except Exception as e:
            print(f"Video generation error: {str(e)}")
            return None

    def play_video(self, filepath):
        """Display the generated video in Colab."""
        if filepath and os.path.exists(filepath):
            display(Video(filepath, embed=True, width=640, height=360))
            time.sleep(1)  # Small delay for rendering
        else:
            print("Video file not found!")

# Main function with user input
def main():
    t2v_system = WanTextToVideo()

    print("Welcome to the Wan 2.1 Text-to-Video Generator!")
    print("Note: This requires a GPU with at least 16GB VRAM for optimal performance.")

    while True:
        print("\nEnter a text prompt for video generation (or 'quit' to exit):")
        prompt = input("> ").strip()

        if prompt.lower() == 'quit':
            print("Exiting video generator. Goodbye!")
            break

        if not prompt:
            print("Please enter a prompt!")
            continue

        # Optional: Customize resolution
        print("Enter resolution (e.g., '1280x720' for 720p, '832x480' for 480p, or press Enter for default 720p):")
        resolution = input("> ").strip() or "1280x720"

        # Generate and play video
        filename = f"video_{int(time.time())}.mp4"  # Unique filename with timestamp
        filepath = t2v_system.generate_video(prompt, resolution=resolution, filename=filename)
        if filepath:
            print("Playing generated video...")
            t2v_system.play_video(filepath)

if __name__ == "__main__":
    try:
        main()
    except KeyboardInterrupt:
        print("Stopped by user")
    except Exception as e:
        print(f"An error occurred: {str(e)}")

# Clean up (optional)
# import shutil
# shutil.rmtree("video_outputs", ignore_errors=True)

Collecting xfuser>=0.4.1
  Downloading xfuser-0.4.2-py3-none-any.whl.metadata (22 kB)
Collecting distvae (from xfuser>=0.4.1)
  Downloading DistVAE-0.0.0b5-py3-none-any.whl.metadata (701 bytes)
Collecting yunchang>=0.6.0 (from xfuser>=0.4.1)
  Downloading yunchang-0.6.0-py3-none-any.whl.metadata (11 kB)
Downloading xfuser-0.4.2-py3-none-any.whl (193 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m193.3/193.3 kB[0m [31m9.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading yunchang-0.6.0-py3-none-any.whl (50 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m51.0/51.0 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading DistVAE-0.0.0b5-py3-none-any.whl (28 kB)
Installing collected packages: yunchang, distvae, xfuser
Successfully installed distvae-0.0.0b5 xfuser-0.4.2 yunchang-0.6.0
Downloading Wan2.1-T2V-14B model...


Fetching 27 files:   0%|          | 0/27 [00:00<?, ?it/s]

.gitattributes:   0%|          | 0.00/2.24k [00:00<?, ?B/s]

Wan2.1_VAE.pth:   0%|          | 0.00/508M [00:00<?, ?B/s]

README.md:   0%|          | 0.00/16.9k [00:00<?, ?B/s]

comp_effic.png:   0%|          | 0.00/1.79M [00:00<?, ?B/s]

LICENSE.txt:   0%|          | 0.00/11.4k [00:00<?, ?B/s]

data_for_diff_stage.jpg:   0%|          | 0.00/528k [00:00<?, ?B/s]

logo.png:   0%|          | 0.00/56.3k [00:00<?, ?B/s]

i2v_res.png:   0%|          | 0.00/892k [00:00<?, ?B/s]

t2v_res.jpg:   0%|          | 0.00/301k [00:00<?, ?B/s]

vben_1.3b_vs_sota.png:   0%|          | 0.00/516k [00:00<?, ?B/s]

vben_vs_sota.png:   0%|          | 0.00/1.55M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/250 [00:00<?, ?B/s]

video_dit_arch.jpg:   0%|          | 0.00/643k [00:00<?, ?B/s]

video_vae_res.jpg:   0%|          | 0.00/213k [00:00<?, ?B/s]

(…)pytorch_model-00001-of-00006.safetensors:   0%|          | 0.00/9.89G [00:00<?, ?B/s]

(…)pytorch_model-00002-of-00006.safetensors:   0%|          | 0.00/9.84G [00:00<?, ?B/s]

(…)pytorch_model-00003-of-00006.safetensors:   0%|          | 0.00/9.84G [00:00<?, ?B/s]

(…)pytorch_model-00004-of-00006.safetensors:   0%|          | 0.00/9.84G [00:00<?, ?B/s]

(…)pytorch_model-00006-of-00006.safetensors:   0%|          | 0.00/7.91G [00:00<?, ?B/s]

(…)pytorch_model-00005-of-00006.safetensors:   0%|          | 0.00/9.84G [00:00<?, ?B/s]

(…)ion_pytorch_model.safetensors.index.json:   0%|          | 0.00/96.8k [00:00<?, ?B/s]

i2v_input.JPG:   0%|          | 0.00/251k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/6.62k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/4.55M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/16.8M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/61.7k [00:00<?, ?B/s]

models_t5_umt5-xxl-enc-bf16.pth:   0%|          | 0.00/11.4G [00:00<?, ?B/s]

Stopped by user
