# Q, K and K^T generation with fixed scale

In [None]:
import numpy as np
from transformers import AutoTokenizer

# -----------------------------
# 1. Setup tokenizer
# -----------------------------
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

text = '''The comprehensive analysis of transformer-based neural network architectures reveals
 that attention mechanisms fundamentally revolutionize how artificial intelligence systems process
 sequential data by enabling parallel computation while maintaining contextual relationships between
 distant elements in the input sequence, thereby significantly improving performance on natural language
 processing tasks such as machine translation, text summarization, question answering, sentiment analysis,
 and named entity recognition, which collectively demonstrate the remarkable versatility and effectiveness
 of these sophisticated deep learning models in understanding and generating human-like text across diverse
 linguistic patterns and semantic structures. The comprehensive analysis of transformer-based neural network architectures reveals
 that attention mechanisms fundamentally revolutionize how artificial intelligence systems process'''

tokens = tokenizer.tokenize(text)
ids = tokenizer.convert_tokens_to_ids(tokens)
print(f"Tokens: {tokens}")
print(f"Token IDs: {ids}")

# -----------------------------
# 2. Generate embeddings (demo)
# -----------------------------
# Simulate embedding lookup of dimension d_model
d_model = 128
np.random.seed(0)
embeddings = np.random.randn(len(ids), d_model)

# -----------------------------
# 3. Define projection matrices for Q and K
# -----------------------------
d_k = 64  # query/key dimension
W_Q = np.random.randn(d_model, d_k)
W_K = np.random.randn(d_model, d_k)

# -----------------------------
# 4. Compute Q, K, and Kᵀ
# -----------------------------
Q = embeddings @ W_Q
K = embeddings @ W_K
K_T = K.T  # <--- Transpose of K

print("\nFloat Q shape:", Q.shape)
print("Float K shape:", K.shape)
print("Float K^T shape:", K_T.shape)

# -----------------------------
# 5. Normalize and quantize to Q0.7 (INT8) with SAME SCALE
# -----------------------------
def quantize_Q07_same_scale(mat1, mat2):

    # Find the maximum absolute value across BOTH matrices
    max_abs_combined = max(np.max(np.abs(mat1)), np.max(np.abs(mat2)))

    def quantize_with_scale(mat, scale):
        # Normalize to [-1, 1] using the combined scale
        norm = mat / scale
        # Scale and round
        q = np.round(norm * 128)
        # Clip to [-128, 127]
        q = np.clip(q, -128, 127)

        return q.astype(np.int8)


    q1 = quantize_with_scale(mat1, max_abs_combined)
    q2 = quantize_with_scale(mat2, max_abs_combined)

    return q1, q2, max_abs_combined

# Use the same scale for both Q and K
Q_int8, K_int8, shared_scale = quantize_Q07_same_scale(Q, K)

K_T_int8 = K_int8.T  # <--- Quantized K transpose

print(f"\nShared scale factor: {shared_scale}")
print(f"Q range: [{np.min(Q_int8)}, {np.max(Q_int8)}]")
print(f"K range: [{np.min(K_int8)}, {np.max(K_int8)}]")


# -----------------------------
# 6. Binary conversion (for hardware)
# -----------------------------
def int8_to_bin(vals):
    """Convert signed int8 array to 8-bit binary strings (2's complement)."""
    flat_vals = vals.flatten()
    bin_list = []
    for v in flat_vals:
        v_int = int(v)  # cast NumPy int8 to Python int
        # ensure proper two’s complement representation
        bin_val = format((v_int + (1 << 8)) % (1 << 8), '08b')
        bin_list.append(bin_val)
    return bin_list

Q_bin = int8_to_bin(Q_int8)
K_bin = int8_to_bin(K_int8)
K_T_bin = int8_to_bin(K_T_int8)

print(f"\nExample binary values (Q): {Q_bin[:8]}")
print(f"Example binary values (K): {K_bin[:8]}")
print(f"Example binary values (K^T): {K_T_bin[:8]}")

# Optionally save them to files for FPGA/RTL testbench
# np.savetxt("Q_matrix.bin", Q_bin, fmt="%s")
# np.savetxt("K_matrix.bin", K_bin, fmt="%s")
# np.savetxt("K_T_matrix.bin", K_T_bin, fmt="%s")


# -----------------------------
# 7. Save Binary Data to Text Files
# -----------------------------

def save_binary_matrix(bin_list, rows, cols, filename):
    """Save flattened binary list into text file with space-separated 8-bit values."""
    with open(filename, "w") as f:
        for i in range(rows):
            row_data = bin_list[i * cols : (i + 1) * cols]
            line = " ".join(row_data)
            f.write(line + "\n")
    print(f"\nSaved: {filename}")

# Get matrix shapes
rows_Q, cols_Q = Q_int8.shape
rows_K, cols_K = K_int8.shape
rows_KT, cols_KT = K_T_int8.shape

# Save each as text file
save_binary_matrix(Q_bin, rows_Q, cols_Q, "Q_matrix_fixed_scale.txt")
save_binary_matrix(K_bin, rows_K, cols_K, "K_matrix_fixed_scale.txt")
save_binary_matrix(K_T_bin, rows_KT, cols_KT, "K_T_matrix_fixed_scale.txt")



Tokens: ['the', 'comprehensive', 'analysis', 'of', 'transform', '##er', '-', 'based', 'neural', 'network', 'architecture', '##s', 'reveals', 'that', 'attention', 'mechanisms', 'fundamentally', 'revolution', '##ize', 'how', 'artificial', 'intelligence', 'systems', 'process', 'sequential', 'data', 'by', 'enabling', 'parallel', 'computation', 'while', 'maintaining', 'context', '##ual', 'relationships', 'between', 'distant', 'elements', 'in', 'the', 'input', 'sequence', ',', 'thereby', 'significantly', 'improving', 'performance', 'on', 'natural', 'language', 'processing', 'tasks', 'such', 'as', 'machine', 'translation', ',', 'text', 'sum', '##mar', '##ization', ',', 'question', 'answering', ',', 'sentiment', 'analysis', ',', 'and', 'named', 'entity', 'recognition', ',', 'which', 'collectively', 'demonstrate', 'the', 'remarkable', 'versa', '##tility', 'and', 'effectiveness', 'of', 'these', 'sophisticated', 'deep', 'learning', 'models', 'in', 'understanding', 'and', 'generating', 'human', '-

# **Q, K and K^T in real**

In [None]:
# -----------------------------
# Save Real (Floating-Point) Data to Text Files
# -----------------------------

def save_real_matrix(matrix, filename):
    """Save floating-point matrix to text file with space-separated values."""
    with open(filename, "w") as f:
        rows, cols = matrix.shape
        for i in range(rows):
            row_data = [f"{matrix[i, j]:.8f}" for j in range(cols)]
            line = " ".join(row_data)
            f.write(line + "\n")
    print(f"Saved: {filename}")


Q_float_normalized = Q_int8.astype(float) / 128
K_float_normalized = K_int8.astype(float) / 128
K_T_float_normalized = K_T_int8.astype(float) / 128

save_real_matrix(Q_float_normalized, "Q_float_normalized_fixed_scale.txt")
save_real_matrix(K_float_normalized, "K_float_normalized_fixed_scale.txt")
save_real_matrix(K_T_float_normalized, "K_T_float_normalized_fixed_scale.txt")

Saved: Q_float_normalized_fixed_scale.txt
Saved: K_float_normalized_fixed_scale.txt
Saved: K_T_float_normalized_fixed_scale.txt


# Product Q and K^T in binary and decimal

In [None]:
import numpy as np

# -----------------------------
# 1. Read matrices from text files
# -----------------------------
def read_binary_matrix_from_file(filename):
    """Read binary matrix from text file and convert to int8 array"""
    with open(filename, 'r') as f:
        lines = f.readlines()

    matrix_data = []
    for line in lines:
        # Split each line by spaces to get individual 8-bit binary values
        binary_values = line.strip().split()
        # Convert each 8-bit binary string to signed int8
        row_values = []
        for bin_val in binary_values:
            # Convert binary to int (handles 2's complement)
            int_val = int(bin_val, 2)
            # Convert to signed int8 (handle 2's complement for values > 127)
            if int_val > 127:
                int_val = int_val - 256
            row_values.append(int_val)
        matrix_data.append(row_values)

    return np.array(matrix_data, dtype=np.int8)

# Read Q and K^T matrices from files
print("Reading matrices from files...")
Q_matrix = read_binary_matrix_from_file("Q_matrix_fixed_scale.txt")
KT_matrix = read_binary_matrix_from_file("K_T_matrix_fixed_scale.txt")

print(f"Q matrix fixed scale shape: {Q_matrix.shape}")
print(f"K^T matrix fixed scale shape: {KT_matrix.shape}")
print(f"Q matrix fixed scale range: [{np.min(Q_matrix)}, {np.max(Q_matrix)}]")
print(f"K^T matrix fixed scale range: [{np.min(KT_matrix)}, {np.max(KT_matrix)}]")

# -----------------------------
# 2. Compute matrix multiplication Q × K^T
# -----------------------------
print("\nComputing Q × K^T...")

# Perform matrix multiplication (result will be in int32 to avoid overflow)
attention_scores_int = Q_matrix.astype(np.int32) @ KT_matrix.astype(np.int32)
print(f"Attention scores shape: {attention_scores_int.shape}")
print(f"Attention scores range: [{np.min(attention_scores_int)}, {np.max(attention_scores_int)}]")

# -----------------------------
# 3. Convert results to binary representation
# -----------------------------
def convert_to_Q07_and_binary(matrix, input_scale):
    """Convert int32 matrix to Q0.7 format and then to 8-bit binary"""

    # First, convert back to floating point using the input scale
    float_matrix = (matrix.astype(float) * input_scale * input_scale) / (128 * 128)

    # Find the maximum absolute value for Q0.7 scaling
    max_abs = np.max(np.abs(float_matrix))

    # Normalize to [-1, 1] range
    normalized = float_matrix / max_abs

    # Convert to Q0.7 format (multiply by 128 and round)
    q07_values = np.round(normalized * 128)

    # Clip to valid Q0.7 range [-128, 127]
    q07_values = np.clip(q07_values, -128, 127)

    # Convert to int8
    q07_int8 = q07_values.astype(np.int8)

    # Convert to 8-bit binary
    flat_vals = q07_int8.flatten()
    bin_list = []
    for v in flat_vals:
        v_int = int(v)
        # Handle 2's complement for 8-bit representation
        bin_val = format((v_int + (1 << 8)) % (1 << 8), '08b')
        bin_list.append(bin_val)

    return q07_int8, bin_list, max_abs

shared_scale = 31.85292984754848  # Replace with your actual shared_scale value

attention_q07, attention_binary_q07, q07_scale = \
convert_to_Q07_and_binary(attention_scores_int, shared_scale)

print(f"\nExample binary values: {attention_binary_q07[:5]}")

# -----------------------------
# 4. Save results to text files
# -----------------------------
def save_matrix_decimal(matrix, filename):
    """Save matrix in decimal format"""
    with open(filename, 'w') as f:
        rows, cols = matrix.shape
        for i in range(rows):
            row_data = [str(matrix[i, j]) for j in range(cols)]
            line = " ".join(row_data)
            f.write(line + "\n")
    print(f"Saved decimal matrix: {filename}")

def save_matrix_binary(bin_list, rows, cols, filename):
    """Save matrix in binary format"""
    with open(filename, 'w') as f:
        for i in range(rows):
            row_data = bin_list[i * cols : (i + 1) * cols]
            line = " ".join(row_data)
            f.write(line + "\n")
    print(f"Saved binary matrix: {filename}")

# Get dimensions
rows, cols = attention_scores_int.shape

# Save in decimal format
save_matrix_decimal(attention_q07, "attention_scores_decimal.txt")

# Save in binary format
save_matrix_binary(attention_binary_q07, rows, cols, "attention_scores_binary.txt")

# -----------------------------
# 5. Display sample results
# -----------------------------
# print(f"\nSample attention scores (decimal):")
# print(attention_scores_int[:3, :3])  # Show first 3x3 submatrix

# print(f"\nSample attention scores (binary, first 3 values):")
# for i in range(min(3, len(attention_binary))):
#     decimal_val = attention_scores_int.flatten()[i]
#     binary_val = attention_binary[i]
#     print(f"Decimal: {decimal_val:6d} | Binary: {binary_val}")

# -----------------------------
# 6. Verification: Check file reading worked correctly
# -----------------------------
# print(f"\nVerification:")
# print(f"Original Q matrix (first 2x2):")
# print(Q_matrix[:2, :2])
# print(f"Original K^T matrix (first 2x2):")
# print(KT_matrix[:2, :2])

# Optional: If you have the shared_scale from previous code,
# you can also compute the floating-point equivalent
# shared_scale = your_scale_value  # from previous code
# attention_scores_float = (attention_scores_int.astype(float) * shared_scale * shared_scale) / (128 * 128)
# print(f"Floating-point equivalent (scaled): {attention_scores_float[:2, :2]}")

Reading matrices from files...
Q matrix fixed scale shape: (128, 64)
K^T matrix fixed scale shape: (64, 128)
Q matrix fixed scale range: [-128, 127]
K^T matrix fixed scale range: [-124, 120]

Computing Q × K^T...
Attention scores shape: (128, 128)
Attention scores range: [-34270, 29342]

Example binary values: ['00000001', '11110100', '11110000', '00010000', '00101001']
Saved decimal matrix: attention_scores_decimal.txt
Saved binary matrix: attention_scores_binary.txt


# Softmax computation - decimal, hex (unsigned Q0.12)

In [None]:
import numpy as np

# Read your Q*K^T matrix from decimal file
def read_decimal_matrix(filename):
    with open(filename, 'r') as f:
        lines = f.readlines()

    matrix_data = []
    for line in lines:
        row_values = [float(val) for val in line.strip().split()]
        matrix_data.append(row_values)

    return np.array(matrix_data)

# Compute softmax
def compute_softmax(x):
    x_shifted = x - np.max(x, axis=1, keepdims=True)
    exp_x = np.exp(x_shifted)
    return exp_x / np.sum(exp_x, axis=1, keepdims=True)

# Convert to Q0.12 unsigned
def convert_to_q012(softmax_values):
    q012_values = np.round(softmax_values * 4096)
    return np.clip(q012_values, 0, 4095).astype(np.uint16)

# Convert to hex
def to_hex(values):
    return [format(int(val), '03X') for val in values.flatten()]

# Main computation
qkt_matrix = read_decimal_matrix("attention_scores_decimal.txt")
softmax_decimal = compute_softmax(qkt_matrix)
softmax_q012 = convert_to_q012(softmax_decimal)
softmax_hex = to_hex(softmax_q012)

rows, cols = softmax_decimal.shape

# Save decimal softmax
with open("softmax_q012_decimal_fixed_scale.txt", 'w') as f:
    for i in range(rows):
        row_data = [f"{softmax_decimal[i, j]:.8f}" for j in range(cols)]
        f.write(" ".join(row_data) + "\n")

# Save Q0.12 hex
with open("softmax_q012_hex_scale.txt", 'w') as f:
    for i in range(rows):
        row_data = softmax_hex[i * cols : (i + 1) * cols]
        f.write(" ".join(row_data) + "\n")



---

---





---



---

