In [9]:
import os
import numpy as np
from sklearn.cluster import KMeans
from PIL import Image

# 图像颜色聚类函数，提取图像的主要颜色
def extract_colors(image_path, num_colors=32):
    print(f"Extracting colors from {image_path}...")
    image = Image.open(image_path).convert('RGB')
    image_np = np.array(image).reshape((-1, 3))
    kmeans = KMeans(n_clusters=num_colors, n_init=10, random_state=42)
    kmeans.fit(image_np)
    colors = kmeans.cluster_centers_.astype(int)
    print(f"Extracted {len(colors)} colors from {image_path}.")
    return [tuple(color) for color in colors]

# 计算欧式距离，衡量颜色的相似度
def distance(c1, c2):
    return np.linalg.norm(np.array(c1) - np.array(c2))

# 建立颜色映射关系，匹配原图颜色到校准图颜色
def build_color_mapping(original_colors, calibrated_colors):
    print("Building color mapping...")
    mapping = {}
    for orig_color in original_colors:
        nearest_color = min(calibrated_colors, key=lambda c: distance(orig_color, c))
        mapping[orig_color] = nearest_color
    print("Color mapping built successfully.")
    return mapping

# 应用颜色映射到新图像
def apply_color_mapping(image_path, mapping, num_colors=32):
    print(f"Applying color mapping to {image_path}...")
    image = Image.open(image_path).convert('RGB')
    image_np = np.array(image)
    h, w, _ = image_np.shape
    pixels = image_np.reshape((-1, 3))

    kmeans = KMeans(n_clusters=num_colors, n_init=10, random_state=42)
    labels = kmeans.fit_predict(pixels)
    centers = kmeans.cluster_centers_.astype(int)

    # 生成新的颜色中心
    new_centers = []
    for center in centers:
        nearest_original = min(mapping.keys(), key=lambda c: distance(center, c))
        new_centers.append(mapping[nearest_original])

    # 替换颜色并重建图像
    new_pixels = np.array([new_centers[label] for label in labels])
    calibrated_image = new_pixels.reshape((h, w, 3)).astype(np.uint8)
    print(f"Finished processing {image_path}.")
    return Image.fromarray(calibrated_image)

# 自动查找“校验”图片，并处理同文件夹下的其他图片
def auto_calibrate_images(directory, num_colors=32):
    print(f"Scanning directory: {directory}")
    files = os.listdir(directory)
    calibration_files = [f for f in files if '校准' in f]
    print(f"Found {len(calibration_files)} calibration files.")

    for cal_file in calibration_files:
        original_file = cal_file.replace('-校准', '')
        cal_path = os.path.join(directory, cal_file)
        orig_path = os.path.join(directory, original_file)
        print(f"Calibration file: {cal_file}, Original file: {original_file}")

        if not os.path.exists(orig_path):
            print(f"Warning: Original file {original_file} not found for {cal_file}")
            continue

        print(f"Processing calibration file {cal_file} with original {original_file}...")
        original_colors = extract_colors(orig_path, num_colors)
        calibrated_colors = extract_colors(cal_path, num_colors)
        mapping = build_color_mapping(original_colors, calibrated_colors)

        for filename in files:
            if filename not in [cal_file, original_file] and filename.endswith('.jpg'):
                img_path = os.path.join(directory, filename)
                calibrated_img = apply_color_mapping(img_path, mapping, num_colors)
                output_path = os.path.join(directory, f'calibrated_{filename}')
                calibrated_img.save(output_path)
                print(f"Saved calibrated image: {output_path}")

    print("Color calibration process completed.")

# 运行程序
directory = r'C:\Users\keith\Desktop\摄影部图片工具备份 - 副本'  # 替换为你的图片文件夹路径
auto_calibrate_images(directory, num_colors=32)


Scanning directory: C:\Users\keith\Desktop\摄影部图片工具备份 - 副本
Found 1 calibration files.
Calibration file: D-SY_06945-校准.jpg, Original file: D-SY_06945.jpg
Processing calibration file D-SY_06945-校准.jpg with original D-SY_06945.jpg...
Extracting colors from C:\Users\keith\Desktop\摄影部图片工具备份 - 副本\D-SY_06945.jpg...
Extracted 32 colors from C:\Users\keith\Desktop\摄影部图片工具备份 - 副本\D-SY_06945.jpg.
Extracting colors from C:\Users\keith\Desktop\摄影部图片工具备份 - 副本\D-SY_06945-校准.jpg...
Extracted 32 colors from C:\Users\keith\Desktop\摄影部图片工具备份 - 副本\D-SY_06945-校准.jpg.
Building color mapping...
Color mapping built successfully.
Applying color mapping to C:\Users\keith\Desktop\摄影部图片工具备份 - 副本\D-SY_06946.jpg...
Finished processing C:\Users\keith\Desktop\摄影部图片工具备份 - 副本\D-SY_06946.jpg.
Saved calibrated image: C:\Users\keith\Desktop\摄影部图片工具备份 - 副本\calibrated_D-SY_06946.jpg
Applying color mapping to C:\Users\keith\Desktop\摄影部图片工具备份 - 副本\D-SY_06947.jpg...
Finished processing C:\Users\keith\Desktop\摄影部图片工具备份 - 副本\D-SY

OCR space

In [3]:
import requests

def ocr_space_file(filename, api_key):
    """ 使用OCR.space API识别本地文件中的文本
    :param filename: 文件路径和名称
    :param api_key: OCR.space API密钥
    :return: 返回的JSON格式结果
    """
    api_url = 'https://api.ocr.space/parse/image'
    with open(filename, 'rb') as file:
        response = requests.post(api_url,
                                 files={filename: file},
                                 data={
                                     'apikey': api_key, 
                                     'isOverlayRequired': True, 
                                     'detectOrientation': True
                                 }
        )
    return response.json()

if __name__ == '__main__':
    # 替换为您的API密钥
    api_key = 'K84328782988957'
    # 替换为您要识别的图像文件路径
    file_path = r"\\192.168.10.229\图片\测试\图片分类\图片说明.jpeg"
    result = ocr_space_file(file_path, api_key)
    print(result.get('ParsedResults')[0].get('TextOverlay'))
    # 输出识别的文本
    parsed_text = result.get('ParsedResults')[0].get('ParsedText')
    print(parsed_text)


{'Lines': [{'LineText': 'Bulk Rename Utili', 'Words': [{'WordText': 'Bulk', 'Left': 60.0, 'Top': 29.0, 'Height': 14.0, 'Width': 36.0}, {'WordText': 'Rename', 'Left': 103.0, 'Top': 29.0, 'Height': 14.0, 'Width': 68.0}, {'WordText': 'Utili', 'Left': 177.0, 'Top': 29.0, 'Height': 14.0, 'Width': 33.0}], 'MaxHeight': 14.0, 'MinTop': 29.0}, {'LineText': 'main', 'Words': [{'WordText': 'main', 'Left': 60.0, 'Top': 65.0, 'Height': 10.0, 'Width': 41.0}], 'MaxHeight': 10.0, 'MinTop': 65.0}, {'LineText': '2024', 'Words': [{'WordText': '2024', 'Left': 471.0, 'Top': 29.0, 'Height': 14.0, 'Width': 43.0}], 'MaxHeight': 14.0, 'MinTop': 29.0}, {'LineText': '2024', 'Words': [{'WordText': '2024', 'Left': 471.0, 'Top': 61.0, 'Height': 14.0, 'Width': 43.0}], 'MaxHeight': 14.0, 'MinTop': 61.0}, {'LineText': '-07 13:43', 'Words': [{'WordText': '-07', 'Left': 545.0, 'Top': 29.0, 'Height': 14.0, 'Width': 28.0}, {'WordText': '13:43', 'Left': 581.0, 'Top': 29.0, 'Height': 14.0, 'Width': 45.0}], 'MaxHeight': 14.0,