In [13]:
import sys
import os
import random
import re
import json
from PyQt5.QtWidgets import QApplication
from PyQt5.QtGui import QFont, QPixmap, QPainter, QColor, QFontDatabase, QTransform, QImage, QFontMetrics
from PyQt5.QtCore import Qt, QRectF, QPointF

# --- ТОХИРГООНЫ ХЭСЭГ ---
all_fonts = [
    '../fonts/Bichimel.ttf',
    '../fonts/mnglwritingotf.ttf',
    '../fonts/UnicodeFontForShare/UnicodeFONT MUNKH/MunkhGalUNICODE/Menk Hawang.ttf',
    '../fonts/UnicodeFontForShare/VertNote/Mongol Bichimel.ttf',
    '../fonts/Chagan.ttf',
    '../fonts/Garchag.ttf',
    '../fonts/Syurga.ttf',
    '../fonts/Urga.ttf',
    '../fonts/UnicodeFontForShare/Microsoft/monbaiti 501.ttf',
    '../fonts/UnicodeFontForShare/UnicodeFONT MUNKH/MunkhGalUNICODE/Menk Qagan.ttf',
    '../fonts/UnicodeFontForShare/UnicodeFONT MUNKH/MunkhGalUNICODE/Menk Sonin.ttf',
    '../fonts/UnicodeFontForShare/VertNote/Mongol Garchig.ttf',
    '../fonts/MONGOLIANSYSTEM0.97.TTF',
    '../fonts/NotoSansMongolian-Regular.ttf',
    '../fonts/UnicodeFontForShare/Mongol tolbo/Hutuch/Hutuch-Light.ttf',
    '../fonts/UnicodeFontForShare/VertNote/Mongol Web.ttf',
    '../fonts/mngltitleotf.ttf',
    '../fonts/UnicodeFontForShare/UnicodeFONT MUNKH/MunkhGalUNICODE/Menk Garqig.ttf',
]

TEXT_FILE_PATH = '../adiya/web_scrap/scraped_data/newsmn.txt'
OUTPUT_DIR_NAME = 'generated_mgl_tilt_v3'
JSON_OUTPUT_FILENAME = "labels_mgl_tilt_v3.json"
JSON_IMAGE_PATH_PREFIX = OUTPUT_DIR_NAME + "/"

MAX_IMAGES_TO_GENERATE = 100
BASE_FONT_SIZE = 100
TEXT_COLOR = QColor("black")
BACKGROUND_COLOR = QColor("white")
BASE_ROTATION_ANGLE = -90 # Монгол бичгийн үндсэн эргүүлэлт (зөв байрлалд оруулна)
RANDOM_TILT_RANGE = (-15, 15) # Нэмэлт санамсаргүй налууны хязгаар (градус)
PADDING_AMOUNT = 30 # Паддингийг бага зэрэг нэмэгдүүлэв
SCALING_FACTOR_CONFIG = 1.2
# --- ТОХИРГООНЫ ХЭСЭГ ТӨГСӨВ ---

def sanitize_filename(text, max_len=40):
    text = re.sub(r'[\\/*?:"<>|]', "", text)
    text = text.replace(" ", "_")
    if not text:
        return "image"
    return text[:max_len]

def generate_text_image(app, text_to_render, font_path, output_path, scaling_factor_param):
    if not text_to_render.strip():
        return False, None

    font_id = QFontDatabase.addApplicationFont(font_path)
    font_name_for_json = os.path.basename(font_path)

    if font_id == -1:
        print(f"Warning: Could not load font: {font_path}. Using system default.")
        font_families = ["Arial"]
    else:
        font_families = QFontDatabase.applicationFontFamilies(font_id)

    if not font_families:
        print(f"Warning: No font families found for {font_path}. Using system default.")
        font = QFont()
    else:
        font = QFont(font_families[0], BASE_FONT_SIZE)

    fm = QFontMetrics(font)
    text_rect_precise = fm.boundingRect(text_to_render)
    
    temp_horizontal_pixmap_width = text_rect_precise.width()
    temp_horizontal_pixmap_height = text_rect_precise.height()

    if temp_horizontal_pixmap_width <= 0 or temp_horizontal_pixmap_height <= 0:
        print(f"Warning: Text '{text_to_render[:20]}...' has zero/negative initial dimensions. Skipping.")
        return False, None

    # Алхам 1: Текстийг хэвтээ байдлаар түр зуурын pixmap дээр зурна
    temp_horizontal_pixmap = QPixmap(int(temp_horizontal_pixmap_width), int(temp_horizontal_pixmap_height))
    temp_horizontal_pixmap.fill(BACKGROUND_COLOR) # Эсвэл Qt.transparent

    painter_temp = QPainter(temp_horizontal_pixmap)
    painter_temp.setRenderHint(QPainter.Antialiasing)
    painter_temp.setFont(font)
    painter_temp.setPen(TEXT_COLOR)
    painter_temp.drawText(QPointF(-text_rect_precise.x(), -text_rect_precise.y()), text_to_render)
    painter_temp.end()

    # Алхам 2: Хэвтээ текстийг BASE_ROTATION_ANGLE (-90) эргүүлж, босоо чиглэлтэй болгоно
    transform_to_vertical = QTransform()
    transform_to_vertical.rotate(BASE_ROTATION_ANGLE)
    
    # Босоо чиглэлтэй текстийг агуулах pixmap-ын хэмжээ
    vertically_oriented_rect = transform_to_vertical.mapRect(QRectF(0, 0, temp_horizontal_pixmap.width(), temp_horizontal_pixmap.height()))
    
    vertically_oriented_text_pixmap_width = int(vertically_oriented_rect.width())
    vertically_oriented_text_pixmap_height = int(vertically_oriented_rect.height())

    vertically_oriented_text_pixmap = QPixmap(vertically_oriented_text_pixmap_width, vertically_oriented_text_pixmap_height)
    vertically_oriented_text_pixmap.fill(BACKGROUND_COLOR) # Энэ pixmap-ын дэвсгэр

    painter_vertical = QPainter(vertically_oriented_text_pixmap)
    painter_vertical.setRenderHint(QPainter.Antialiasing)
    painter_vertical.setRenderHint(QPainter.SmoothPixmapTransform)
    painter_vertical.translate(vertically_oriented_text_pixmap_width / 2, vertically_oriented_text_pixmap_height / 2)
    painter_vertical.rotate(BASE_ROTATION_ANGLE)
    painter_vertical.drawPixmap(
        int(-temp_horizontal_pixmap.width() / 2), 
        int(-temp_horizontal_pixmap.height() / 2), 
        temp_horizontal_pixmap
    )
    painter_vertical.end()

    # Одоо vertically_oriented_text_pixmap нь зөв босоо чиглэлтэй монгол бичгийг агуулж байна.

    # Алхам 3: Энэ босоо чиглэлтэй текстийг санамсаргүй өнцгөөр налуулж, эцсийн зураг үүсгэнэ
    random_tilt_angle = random.uniform(RANDOM_TILT_RANGE[0], RANDOM_TILT_RANGE[1])
    
    transform_final_tilt = QTransform()
    transform_final_tilt.rotate(random_tilt_angle) # Зөвхөн санамсаргүй налууг хийнэ

    # Эцсийн эргүүлсэн (налуутай) контентын хэмжээ
    final_tilted_content_rect = transform_final_tilt.mapRect(QRectF(0, 0, vertically_oriented_text_pixmap.width(), vertically_oriented_text_pixmap.height()))

    final_content_width = int(final_tilted_content_rect.width())
    final_content_height = int(final_tilted_content_rect.height())
    
    padded_final_width = final_content_width + PADDING_AMOUNT * 2
    padded_final_height = final_content_height + PADDING_AMOUNT * 2
    
    final_tilted_pixmap = QPixmap(padded_final_width, padded_final_height)
    final_tilted_pixmap.fill(BACKGROUND_COLOR)

    painter_final_tilted = QPainter(final_tilted_pixmap)
    painter_final_tilted.setRenderHint(QPainter.Antialiasing)
    painter_final_tilted.setRenderHint(QPainter.SmoothPixmapTransform)

    painter_final_tilted.translate(padded_final_width / 2, padded_final_height / 2)
    painter_final_tilted.rotate(random_tilt_angle) # Зөвхөн санамсаргүй налууг хийнэ
    
    # vertically_oriented_text_pixmap-г (зөв босоо чиглэлтэй) зурна
    painter_final_tilted.drawPixmap(
        int(-vertically_oriented_text_pixmap.width() / 2), 
        int(-vertically_oriented_text_pixmap.height() / 2), 
        vertically_oriented_text_pixmap
    )
    painter_final_tilted.end()
    
    # Алхам 4: Нягтралыг бууруулах
    final_image = final_tilted_pixmap.toImage() # final_tilted_pixmap-с image үүсгэнэ
    current_scaling_factor = scaling_factor_param

    if current_scaling_factor <= 1.001 and current_scaling_factor >= 0.999:
        scaled_image = final_image
    elif current_scaling_factor < 0.1 :
        print(f"Warning: Scaling factor {current_scaling_factor} is too small. Using unscaled image.")
        scaled_image = final_image
    else:
        # Багасгах хэмжээг padded_final_width, padded_final_height-с тооцоолно
        target_width = int(padded_final_width / current_scaling_factor)
        target_height = int(padded_final_height / current_scaling_factor)

        if target_width < 1 or target_height < 1:
            print(f"Warning: Scaled image for '{text_to_render[:20]}...' would be too small. Using unscaled image.")
            scaled_image = final_image
        else:
            scaled_image = final_image.scaled(
                target_width,
                target_height,
                Qt.KeepAspectRatio,
                Qt.SmoothPixmapTransform
            )
    
    if scaled_image.save(output_path, "PNG"):
        # print(f"Generated: {output_path} (Font: {font_name_for_json}, Tilt: {random_tilt_angle:.2f})")
        return True, font_name_for_json
    else:
        print(f"Error: Failed to save image to {output_path}")
        return False, None

def main():
    app = QApplication.instance()
    if app is None:
        app = QApplication(sys.argv)

    try:
        script_location_dir = os.path.dirname(os.path.abspath(__file__))
    except NameError:
        print("Info: '__file__' is not defined. Using CWD as base.")
        script_location_dir = os.getcwd()
    print(f"Info: Base directory for relative paths: {script_location_dir}")

    valid_fonts = []
    print("Info: Checking font files...")
    for f_path_relative in all_fonts:
        abs_font_path_attempt1 = os.path.abspath(os.path.join(script_location_dir, f_path_relative))
        if os.path.exists(abs_font_path_attempt1):
            valid_fonts.append(abs_font_path_attempt1)
        elif os.path.exists(f_path_relative): 
            valid_fonts.append(os.path.abspath(f_path_relative))
        else:
            print(f"  Warning: Font file not found: {f_path_relative} (checked relative to script and as-is)")

    if not valid_fonts:
        print("Error: No valid font files found. Exiting.")
        return
    print(f"Info: Found {len(valid_fonts)} valid font(s).")

    output_dir_abs = os.path.abspath(os.path.join(script_location_dir, OUTPUT_DIR_NAME))
    if not os.path.exists(output_dir_abs):
        try:
            os.makedirs(output_dir_abs)
            print(f"Info: Created output directory: {output_dir_abs}")
        except OSError as e:
            print(f"Error: Could not create output directory {output_dir_abs}: {e}")
            return
    else:
        print(f"Info: Output directory: {output_dir_abs}")

    text_file_abs_path = os.path.abspath(os.path.join(script_location_dir, TEXT_FILE_PATH))
    print(f"Info: Reading text file from: {text_file_abs_path}")
    
    full_text_content = ""
    try:
        with open(text_file_abs_path, 'r', encoding='utf-8') as f:
            full_text_content = f.read().strip()
        print(f"Info: Read {len(full_text_content)} characters.")
    except FileNotFoundError:
        print(f"Error: Text file not found: {text_file_abs_path}. Exiting.")
        return
    
    if not full_text_content:
        print(f"Info: No text content found. Exiting.")
        return

    words = full_text_content.split()
    if not words:
        print("Info: No words found after splitting. Exiting.")
        return
    print(f"Info: Split text into {len(words)} words.")

    all_generated_data_for_json = []
    generated_image_count = 0
    current_word_index = 0
    
    print(f"Info: Starting image generation (max {MAX_IMAGES_TO_GENERATE} images)...")
    print(f"Info: Using scaling factor: {SCALING_FACTOR_CONFIG}")
    print(f"Info: Base rotation for Mongolian script: {BASE_ROTATION_ANGLE} degrees")
    print(f"Info: Random tilt range for final image: {RANDOM_TILT_RANGE} degrees")

    while current_word_index < len(words) and generated_image_count < MAX_IMAGES_TO_GENERATE:
        num_words_to_combine = 1
        
        if current_word_index + num_words_to_combine > len(words):
            num_words_to_combine = len(words) - current_word_index
            if num_words_to_combine <= 0:
                break

        word_chunk_list = words[current_word_index : current_word_index + num_words_to_combine]
        text_for_image = " ".join(word_chunk_list)

        if not text_for_image.strip():
            current_word_index += num_words_to_combine
            continue
            
        font_path_to_use = random.choice(valid_fonts)
        base_name_part = sanitize_filename(text_for_image)
        image_filename = f"{base_name_part}_{generated_image_count + 1}.png"
        output_filepath_to_use = os.path.join(output_dir_abs, image_filename)
        
        success, used_font_name = generate_text_image(app, text_for_image, font_path_to_use, output_filepath_to_use, SCALING_FACTOR_CONFIG)
        
        if success:
            generated_image_count += 1
            json_entry = {
                "label": text_for_image,
                "fonts": [used_font_name],
                "path": JSON_IMAGE_PATH_PREFIX + image_filename
            }
            all_generated_data_for_json.append(json_entry)

            if generated_image_count % 20 == 0 :
                 print(f"   Generated {generated_image_count} images so far...")
        
        current_word_index += num_words_to_combine

    print(f"Finished image generation. Successfully generated {generated_image_count} image(s) in '{output_dir_abs}'.")

    if all_generated_data_for_json:
        json_output_path = os.path.join(output_dir_abs, JSON_OUTPUT_FILENAME)
        try:
            with open(json_output_path, 'w', encoding='utf-8') as f_json:
                json.dump(all_generated_data_for_json, f_json, ensure_ascii=False, indent=2)
            print(f"Info: Successfully saved JSON data to {json_output_path}")
        except IOError as e:
            print(f"Error: Could not save JSON file {json_output_path}: {e}")
    else:
        print("Info: No data to save to JSON file.")

if __name__ == '__main__':
    main()

Info: '__file__' is not defined. Using CWD as base.
Info: Base directory for relative paths: c:\Users\domogdog\Desktop\bigproject\Mongol-tuulgatnii-hoyr-erhem-project-OCR-\munguu
Info: Checking font files...
Info: Found 18 valid font(s).
Info: Created output directory: c:\Users\domogdog\Desktop\bigproject\Mongol-tuulgatnii-hoyr-erhem-project-OCR-\munguu\generated_mgl_tilt_v3
Info: Reading text file from: c:\Users\domogdog\Desktop\bigproject\Mongol-tuulgatnii-hoyr-erhem-project-OCR-\adiya\web_scrap\scraped_data\newsmn.txt
Info: Read 1292848 characters.
Info: Split text into 216407 words.
Info: Starting image generation (max 100 images)...
Info: Using scaling factor: 1.2
Info: Base rotation for Mongolian script: -90 degrees
Info: Random tilt range for final image: (-15, 15) degrees


AttributeError: type object 'Qt' has no attribute 'SmoothPixmapTransform'