In [35]:
from gtts import gTTS
import os

def generate_letter_speech(text):
    """
    Generate MP3 files for each letter in the text with unique filenames.

    Args:
        text (str): The text to convert into speech.

    Returns:
        None
    """
    if not text.isalpha():
        raise ValueError("Input text should only contain alphabetic characters.")

    output_dir = "letter_speech"
    os.makedirs(output_dir, exist_ok=True)

    # Use a dictionary to keep track of occurrences of each letter
    letter_counts = {}

    for letter in text:
        if letter not in letter_counts:
            letter_counts[letter] = 0
        else:
            letter_counts[letter] += 1
        
        file_path = os.path.join(output_dir, f"{letter}.mp3")
        
        tts = gTTS(text=letter, lang='en')
        tts.save(file_path)
        print(f"Generated speech for '{letter}' saved as {file_path}")

# Example usage
# generate_letter_speech("cell")


In [36]:
from pydub import AudioSegment
import os


def combine_mp3_files(text, output_file='combined.mp3'):
    """
    Combine MP3 files for each letter into a single MP3 file.

    Args:
        text (str): The text whose letters' speech files to combine.
        output_file (str): Path to the output MP3 file.

    Returns:
        None
    """
    output_dir = "letter_speech"
    combined_audio = AudioSegment.empty()

    # Use a dictionary to keep track of occurrences of each letter
    letter_counts = {}

    for letter in text:
        if letter not in letter_counts:
            letter_counts[letter] = 0
        else:
            letter_counts[letter] += 1

        # file_suffix = f"_{letter_counts[letter]}" if letter_counts[
        #     letter] > 0 else ""
        file_path = os.path.join(output_dir, f"{letter}.mp3")

        print(file_path)
        if os.path.exists(file_path):
            audio = AudioSegment.from_mp3(file_path)
            combined_audio += audio
        else:
            print(f"File not found: {file_path}")

    combined_audio.export(output_file, format='mp3')
    print(f"Combined audio saved as {output_file}")

# # Example usage
# generate_letter_speech("abcdefghijklmnopqrstuvwxyz")
# Example usage

combine_mp3_files("celkkaabb", "cell_combined.mp3")

letter_speech/c.mp3
letter_speech/e.mp3
letter_speech/l.mp3
letter_speech/k.mp3
letter_speech/k.mp3
letter_speech/a.mp3
letter_speech/a.mp3
letter_speech/b.mp3
letter_speech/b.mp3
Combined audio saved as cell_combined.mp3


In [44]:
import random
import string

def generate_random_letters(n, unique=True):
    """
    生成随机字母字符串。

    Args:
        n (int): 字符串的长度。
        unique (bool): 是否生成不重复的字母。如果为True，生成不重复的字母；如果为False，允许重复。

    Returns:
        str: 生成的随机字母字符串。
    """
    if unique:
        if n > 26:
            raise ValueError("无法生成超过26个不重复的字母。")
        # 生成所有字母并打乱
        letters = list(string.ascii_lowercase)
        random.shuffle(letters)
        # 选择前n个字母
        return ''.join(letters[:n])
    else:
        # 从所有字母中随机选择n个字母
        return ''.join(random.choices(string.ascii_lowercase, k=n))

# 示例用法
print(generate_random_letters(5, unique=True))  # 生成5个不重复的字母
print(generate_random_letters(5, unique=False))  # 生成5个可能重复的字母

letters = generate_random_letters(5, unique=True)
combine_mp3_files(letters, "cell_combined.mp3")

opfgm
exmem
letter_speech/h.mp3
letter_speech/y.mp3
letter_speech/p.mp3
letter_speech/e.mp3
letter_speech/u.mp3
Combined audio saved as cell_combined.mp3


In [48]:
import unicodedata

def generate_all_simplified_chinese_characters():
    """
    生成包含所有简体汉字的列表。
    
    Returns:
        list: 包含所有简体汉字的列表。
    """
    chinese_characters = []

    # Define Unicode ranges for CJK Unified Ideographs and extensions
    unicode_ranges = [
        (0x4E00, 0x9FFF),     # CJK Unified Ideographs
        (0x3400, 0x4DBF),     # CJK Unified Ideographs Extension A
        (0x20000, 0x2A6DF),   # CJK Unified Ideographs Extension B
        (0x2A700, 0x2B73F),   # CJK Unified Ideographs Extension C
        (0x2B740, 0x2B81F),   # CJK Unified Ideographs Extension D
        (0x2B820, 0x2CEAF),   # CJK Unified Ideographs Extension E
        (0x2CEB0, 0x2EBEF)    # CJK Unified Ideographs Extension F
    ]

    for start, end in unicode_ranges:
        for codepoint in range(start, end + 1):
            try:
                char = chr(codepoint)
                # Check if the character is a CJK Ideograph by its Unicode category
                if unicodedata.category(char).startswith('Lo'):
                    # Check if it is indeed a CJK Unified Ideograph
                    if unicodedata.name(char).startswith('CJK UNIFIED IDEOGRAPH'):
                        chinese_characters.append(char)
            except ValueError:
                # If there is no Unicode name, skip the character
                continue
    
    return chinese_characters

# Example usage
all_simplified_chinese_characters = generate_all_simplified_chinese_characters()
print(f"Number of simplified Chinese characters: {len(all_simplified_chinese_characters)}")
print(f"First 10 characters: {all_simplified_chinese_characters[:10]}")


Number of simplified Chinese characters: 87905
First 10 characters: ['一', '丁', '丂', '七', '丄', '丅', '丆', '万', '丈', '三']
