In [23]:
import numpy as np
from PIL import Image, ImageDraw, ImageFont
import csv
import os

font_path = "times.ttf"
font_size = 52
chars = "abcdefghijklmnopqrstuvwxyz"
output_folder = "symbols"

if not os.path.exists(output_folder):
    os.makedirs(output_folder)

try:
    font = ImageFont.truetype(font_path, font_size)
except IOError:
    print(f"Ошибка:")
    exit()

for char in chars:
    image = Image.new("L", (font_size * 2, font_size * 2), 255)
    draw = ImageDraw.Draw(image)

    bbox = draw.textbbox((0, 0), char, font=font)
    text_width = bbox[2] - bbox[0]
    text_height = bbox[3] - bbox[1]

    x = (image.width - text_width) / 2
    y = (image.height - text_height) / 2
    draw.text((x, y), char, font=font, fill=0)
    bbox = image.getbbox()
    if bbox:
        image = image.crop(bbox)
    else:
        print(f"Предупреждение: Символ '{char}' не имеет видимых пикселей.")
        continue

    image.save(f"{output_folder}/{char}.png")

def calculate_features(image_path):
    img = Image.open(image_path).convert("1")
    pixels = np.array(img)
    mass = np.sum(pixels == 0)
    if mass == 0:
        return 0, 0, 0, 0, 0
    rows, cols = np.where(pixels == 0)
    center_x = np.mean(cols) if len(cols) > 0 else 0
    center_y = np.mean(rows) if len(rows) > 0 else 0
    inertia_x = np.sum((rows - center_y)**2)
    inertia_y = np.sum((cols - center_x)**2)
    return mass, center_x, center_y, inertia_x, inertia_y

with open("features1.csv", "w", newline="") as csvfile:  
    writer = csv.writer(csvfile)
    writer.writerow(["symbol", "mass", "center_x", "center_y", "inertia_x", "inertia_y"])  

    for char in chars:
        image_path = f"{output_folder}/{char}.png"
        try:
            features = calculate_features(image_path)
            writer.writerow([char, *features])
        except FileNotFoundError:
            print(f"Ошибка: Файл {image_path} не найден.")
        except Exception as e:
            print(f"Ошибка при обработке символа '{char}': {e}")

print("Эталонные признаки сохранены в features1.csv")


Эталонные признаки сохранены в features1.csv


In [15]:
import numpy as np
from PIL import Image, ImageDraw, ImageFont
import pandas as pd
import math
import os
import csv

def calculate_features(image_path):
    img = Image.open(image_path).convert("1")
    pixels = np.array(img)
    mass = np.sum(pixels == 0)
    
    if mass == 0:
        return np.zeros(5)
    
    rows, cols = np.where(pixels == 0)
    center_x = np.mean(cols)
    center_y = np.mean(rows)
    inertia_x = np.sum((rows - center_y)**2)
    inertia_y = np.sum((cols - center_x)**2)
    
    return np.array([mass, center_x, center_y, inertia_x, inertia_y])

def normalize_features(features):
    mean = np.mean(features)
    std = np.std(features)
    if std == 0:
        return features
    return (features - mean) / std

def euclidean_distance(features1, features2):
    features1_norm = normalize_features(features1)
    features2_norm = normalize_features(features2)
    return math.sqrt(np.sum((features1_norm - features2_norm)**2))

def similarity_measure(distance):
    return 1 / (1 + distance) if distance != 0 else 1.0

def segment_characters(vertical_profile, min_gap=3):
    segments = []
    in_char = False
    start = 0
    
    for i, value in enumerate(vertical_profile):
        if value > 0 and not in_char:
            start = i
            in_char = True
        elif value == 0 and in_char:
            if (i - start) >= min_gap:
                segments.append((start, i))
                in_char = False
    
    if in_char:
        segments.append((start, len(vertical_profile)))
    
    return segments

def recognize_characters(image_path, features_df):
    img = Image.open(image_path).convert("1")
    pixels = np.array(img)
    vertical_profile = np.sum(pixels == 0, axis=0)
    segments = segment_characters(vertical_profile)
    
    results = []
    for x1, x2 in segments:
        char_image = Image.fromarray(pixels[:, x1:x2]).convert("L")
        char_image.save("temp_char.bmp")
        char_features = calculate_features("temp_char.bmp")
        
        hypotheses = []
        for symbol, row in features_df.iterrows():
            ref_features = row.values
            distance = euclidean_distance(char_features, ref_features)
            similarity = similarity_measure(distance)
            hypotheses.append((symbol, similarity))
        
        hypotheses.sort(key=lambda x: x[1], reverse=True)
        results.append(hypotheses)
        os.remove("temp_char.bmp")
    
    return results, segments

def calculate_recognition_rate(recognized, original):
    errors = 0
    min_len = min(len(recognized), len(original))
    for i in range(min_len):
        if recognized[i] != original[i]:
            errors += 1
    correct_percentage = (min_len - errors) / len(original) * 100
    return errors, correct_percentage

def save_hypotheses(results, output_file):
    with open(output_file, "w", newline="") as f:
        writer = csv.writer(f)
        writer.writerow(["Symbol", "Hypotheses"])
        for i, hypotheses in enumerate(results):
            writer.writerow([i+1, str(hypotheses)])

def main():
    features_df = pd.read_csv("features1.csv", index_col="symbol")
    
    image_path = "lh1.bmp"
    original_string = "iloveyouihatehim"
    
    results, _ = recognize_characters(image_path, features_df)
    recognized_string = "".join([hyp[0][0] for hyp in results])
    
    save_hypotheses(results, "hypotheses.csv")
    
    print("Лучшие гипотезы:")
    for i, hyp in enumerate(results):
        print(f"{i+1}: {hyp[:3]}...")  
    
    print(f"\nРаспознанная строка: {recognized_string}")
    print(f"Оригинальная строка: {original_string}")
    
    errors, percentage = calculate_recognition_rate(recognized_string, original_string)
    print(f"\nОшибок: {errors}, Процент верных: {percentage:.2f}%")
    

if __name__ == "__main__":
    main()

Лучшие гипотезы:
1: [('i', 0.991937100485438), ('l', 0.9880926972237751), ('j', 0.9371778135175904)]...
2: [('l', 0.9944779873698968), ('i', 0.9920665993886094), ('j', 0.9302045376614332)]...
3: [('o', 0.9054221682970245), ('u', 0.7741513833382708), ('n', 0.7318712152269546)]...
4: [('a', 0.9530249764757369), ('e', 0.9482317329214371), ('v', 0.8941594063234111)]...
5: [('c', 0.9875455221777936), ('e', 0.8992006768209128), ('a', 0.8950804348533767)]...
6: [('y', 0.9633382583270965), ('r', 0.9112209534729027), ('k', 0.9075179878485338)]...
7: [('o', 0.9040839197670499), ('u', 0.7751318505590409), ('n', 0.7327465759241153)]...
8: [('n', 0.9228870546684638), ('v', 0.8672930495373109), ('u', 0.8634290251779322)]...
9: [('i', 0.991937100485438), ('l', 0.9880926972237751), ('j', 0.9371778135175904)]...
10: [('z', 0.9975930074632929), ('h', 0.988516340373003), ('s', 0.9545838330134929)]...
11: [('c', 0.9733521734211104), ('e', 0.9187181575843653), ('a', 0.9144685350316829)]...
12: [('t', 0.989

In [5]:
import numpy as np
from PIL import Image, ImageDraw, ImageFont
import pandas as pd
import math
import os
import csv

def calculate_features(image_path):
    img = Image.open(image_path).convert("1")
    pixels = np.array(img)
    mass = np.sum(pixels == 0)
    
    if mass == 0:
        return np.zeros(5)
    
    rows, cols = np.where(pixels == 0)
    center_x = np.mean(cols)
    center_y = np.mean(rows)
    inertia_x = np.sum((rows - center_y)**2)
    inertia_y = np.sum((cols - center_x)**2)
    
    return np.array([mass, center_x, center_y, inertia_x, inertia_y])

def normalize_features(features):
    mean = np.mean(features)
    std = np.std(features)
    if std == 0:
        return features
    return (features - mean) / std

def euclidean_distance(features1, features2):
    features1_norm = normalize_features(features1)
    features2_norm = normalize_features(features2)
    return math.sqrt(np.sum((features1_norm - features2_norm)**2))

def similarity_measure(distance):
    return 1 / (1 + distance) if distance != 0 else 1.0

def segment_characters(vertical_profile, min_gap=3):
    segments = []
    in_char = False
    start = 0
    
    for i, value in enumerate(vertical_profile):
        if value > 0 and not in_char:
            start = i
            in_char = True
        elif value == 0 and in_char:
            if (i - start) >= min_gap:
                segments.append((start, i))
                in_char = False
    
    if in_char:
        segments.append((start, len(vertical_profile)))
    
    return segments

def recognize_characters(image_path, features_df):
    img = Image.open(image_path).convert("1")
    pixels = np.array(img)
    vertical_profile = np.sum(pixels == 0, axis=0)
    segments = segment_characters(vertical_profile)
    
    results = []
    for x1, x2 in segments:
        char_image = Image.fromarray(pixels[:, x1:x2]).convert("L")
        char_image.save("temp_char.bmp")
        char_features = calculate_features("temp_char.bmp")
        
        hypotheses = []
        for symbol, row in features_df.iterrows():
            ref_features = row.values
            distance = euclidean_distance(char_features, ref_features)
            similarity = similarity_measure(distance)
            hypotheses.append((symbol, similarity))
        
        hypotheses.sort(key=lambda x: x[1], reverse=True)
        results.append(hypotheses)
        os.remove("temp_char.bmp")
    
    return results, segments

def calculate_recognition_rate(recognized, original):
    errors = 0
    min_len = min(len(recognized), len(original))
    for i in range(min_len):
        if recognized[i] != original[i]:
            errors += 1
    correct_percentage = (min_len - errors) / len(original) * 100
    return errors, correct_percentage

def save_hypotheses(results, output_file):
    with open(output_file, "w", newline="") as f:
        writer = csv.writer(f)
        writer.writerow(["Symbol", "Hypotheses"])
        for i, hypotheses in enumerate(results):
            writer.writerow([i+1, str(hypotheses)])

def main():
    features_df = pd.read_csv("features1.csv", index_col="symbol")
    
    image_path = "i_love_you_1.bmp"
    original_string = "iloveyou"
    
    results, _ = recognize_characters(image_path, features_df)
    recognized_string = "".join([hyp[0][0] for hyp in results])
    
    save_hypotheses(results, "hypotheses.csv")
    
    print("Лучшие гипотезы:")
    for i, hyp in enumerate(results):
        print(f"{i+1}: {hyp[:3]}...") 
    
    print(f"\nРаспознанная строка: {recognized_string}")
    print(f"Оригинальная строка: {original_string}")
    
    errors, percentage = calculate_recognition_rate(recognized_string, original_string)
    print(f"\nОшибок: {errors}, Процент верных: {percentage:.2f}%")
    
    # Эксперимент с другим размером шрифта
    print("\nЭксперимент с другим размером шрифта:")
    diff_size_image = "i_love_you_small.bmp"
    results_diff, _ = recognize_characters(diff_size_image, features_df)
    recognized_diff = "".join([hyp[0][0] for hyp in results_diff])
    
    print(f"Распознанная строка: {recognized_diff}")
    errors_diff, percentage_diff = calculate_recognition_rate(recognized_diff, original_string)
    print(f"Ошибок: {errors_diff}, Процент верных: {percentage_diff:.2f}%")

if __name__ == "__main__":
    main()

Лучшие гипотезы:
1: [('i', 0.988642984457858), ('l', 0.9834896879147952), ('j', 0.9416775673474895)]...
2: [('l', 0.9943235779810432), ('i', 0.9939828330651287), ('j', 0.9320350813898534)]...
3: [('o', 0.8581620609362928), ('u', 0.8124695223508521), ('n', 0.765989127994338)]...
4: [('v', 0.9694855939287945), ('a', 0.8792980980293229), ('e', 0.8752291794674347)]...
5: [('a', 0.9397718495529175), ('e', 0.9351004487433331), ('v', 0.9067421833296498)]...
6: [('y', 0.9666468458982495), ('k', 0.9105445909436184), ('r', 0.9082155078247363)]...
7: [('o', 0.8581620609362928), ('u', 0.8124695223508521), ('n', 0.765989127994338)]...
8: [('n', 0.9236196210433697), ('v', 0.8667287325149178), ('u', 0.8640436004699719)]...

Распознанная строка: ilovayon
Оригинальная строка: iloveyou

Ошибок: 2, Процент верных: 75.00%

Эксперимент с другим размером шрифта:
Распознанная строка: iioecyon
Ошибок: 4, Процент верных: 50.00%
