In [1]:
from PIL import Image
import os
import numpy as np
import tensorflow as tf

In [2]:
# path = '../dataset/instruction-front'
path = '../dataset/instruction-complete'
# path = '../dataset/instruction-complete-color'
# s_path = "../dataset/syntax-front"
s_path = "../dataset/syntax-complete"
# s_path = "../dataset/syntax-complete-color"

# prog_ch = 14 
prog_ch = 34 

img_files = os.listdir(path)

img_list = []
for f in img_files:
    img_path = os.path.join(path, f)
    try:
        img = np.array(Image.open(img_path)).astype(np.int32)
    except:
        continue
    img_list.append(img)

In [3]:
# 定义 8 个方向的偏移
dx = [-1,  0, 1, -1, 1, -1,  0, 1]
dy = [-1, -1, -1,  0, 0,  1,  1, 1]

# 最大值，用于确定 T 矩阵的大小（假设矩阵中的最大值不超过 6）
num_directions = len(dx)

# 初始化 8 个 T 矩阵
T_matrices = [np.zeros((prog_ch, prog_ch), dtype=int) for _ in range(num_directions)]

In [4]:
# 遍历每个矩阵
for arr in img_list:
    # 遍历每个方向
    for n, (dx_n, dy_n) in enumerate(zip(dx, dy)):
        # 初始化一个零矩阵，用于存储组合计数
        Tn = T_matrices[n]

        # 计算偏移后的索引范围
        if dx_n >= 0:
            x_orig_slice = slice(dx_n, arr.shape[0])
            x_shifted_slice = slice(0, arr.shape[0] - dx_n)
        else:
            x_orig_slice = slice(0, arr.shape[0] + dx_n)
            x_shifted_slice = slice(-dx_n, arr.shape[0])

        if dy_n >= 0:
            y_orig_slice = slice(dy_n, arr.shape[1])
            y_shifted_slice = slice(0, arr.shape[1] - dy_n)
        else:
            y_orig_slice = slice(0, arr.shape[1] + dy_n)
            y_shifted_slice = slice(-dy_n, arr.shape[1])

        # 获取重叠区域的原始值和偏移后的值
        original_values = arr[x_orig_slice, y_orig_slice]
        shifted_values = arr[x_shifted_slice, y_shifted_slice]

        # 将二维数组展平为一维
        original_values_flat = original_values.flatten()
        shifted_values_flat = shifted_values.flatten()

        # 计算值对的索引
        indices = original_values_flat * prog_ch + shifted_values_flat

        # 计算组合出现的次数
        counts = np.bincount(indices, minlength=prog_ch*prog_ch)

        # 将计数结果累加到 T 矩阵中
        Tn += counts.reshape((prog_ch, prog_ch))

# 输出结果
for i, T in enumerate(T_matrices):
    print(f"T{i+1} (方向 dx={dx[i]}, dy={dy[i]}):")
    print(T)
    print()


T1 (方向 dx=-1, dy=-1):
[[3347452  233408    3696 ...       0       0       0]
 [ 236186  486738    3437 ...       0     150       0]
 [   4306    2983    1051 ...       0       0       0]
 ...
 [      0       0       0 ...    1942       0       0]
 [      0     154       0 ...       0   15143       0]
 [      0       0       0 ...       0       0    3544]]

T2 (方向 dx=0, dy=-1):
[[3718629  186689    4144 ...       0       0       0]
 [ 186601  585356    4100 ...       0      68       0]
 [   4222    4065      24 ...       0       0       0]
 ...
 [      0       0       0 ...    7180       0       0]
 [      0      94       0 ...       0   18355       0]
 [      0       0       0 ...       0       0    4180]]

T3 (方向 dx=1, dy=-1):
[[3349326  234620    4247 ...       0       0       0]
 [ 232288  487467    2971 ...       0     132       0]
 [   3682    3388    1102 ...       0       0       0]
 ...
 [      0       0       0 ...    1958       0       0]
 [      0     180       0 ...       0

In [5]:
# # 设置输出为科学计数法格式
# np.set_printoptions(formatter={'float_kind': lambda x: "{:.3e}".format(x)})

# 保存到 .txt 文件中，每个 T 矩阵保存在 T1.txt, T2.txt...T8.txt 中
for i, T in enumerate(T_matrices):
    if not os.path.exists(s_path):
        os.makedirs(s_path)
    filename = f"{s_path}/T{i+1}.txt"
    np.savetxt(filename, T.astype(float), delimiter=',', fmt="%.5e")
    print(f"Saved {filename}")

Saved ./dataset/syntax-complete/T1.txt
Saved ./dataset/syntax-complete/T2.txt
Saved ./dataset/syntax-complete/T3.txt
Saved ./dataset/syntax-complete/T4.txt
Saved ./dataset/syntax-complete/T5.txt
Saved ./dataset/syntax-complete/T6.txt
Saved ./dataset/syntax-complete/T7.txt
Saved ./dataset/syntax-complete/T8.txt
