In [67]:
import random

mean = 0.0
stddev = 0.1
random.gauss(mean, stddev)

def generate_normal_weights_matrix(rows, cols, mean=0.0, stddev=0.1):
    weights_matrix = []
    for _ in range(rows):
        row = [random.gauss(mean, stddev) for _ in range(cols)]
        weights_matrix.append(row)
    return weights_matrix

weights_matrix = generate_normal_weights_matrix(1000, 1000, mean, stddev)
weights = [weight for row in weights_matrix for weight in row]

In [68]:
def quantize_weights(weights, max_error):
    min_value = min(weights)
    max_value = max(weights)
    step = 2 * max_error
    quantized_weights = [round((weight - min_value) / step) for weight in weights]
    return quantized_weights, step, min_value

def dequantize_weights(quantized_weights, step, min_value):
    recovered_weights = [quantized_weight * step + min_value for quantized_weight in quantized_weights]
    return recovered_weights
    
max_error = 0.01
quantized_weights, step, min_value = quantize_weights(weights, max_error)
recovered_weights = dequantize_weights(quantized_weights, step, min_value)

In [69]:
import heapq

def huffman_encode(weights):
    from collections import Counter
    freq = Counter(weights)
    heap = [[weight, [symbol, '']] for symbol, weight in freq.items()]
    heapq.heapify(heap)
    while len(heap) > 1:
        l = heapq.heappop(heap)
        h = heapq.heappop(heap)
        for pair in l[1:]:
            pair[1] = '0' + pair[1]
        for pair in h[1:]:
            pair[1] = '1' + pair[1]
        newNode = [l[0] + h[0]] + l[1:] + h[1:]
        heapq.heappush(heap, newNode)
    huff_dict = {}
    if heap:
        heap = heapq.heappop(heap)
        for symbol_info in heap[1:]:
            huff_dict[symbol_info[0]] = symbol_info[1]
    else:
        huff_dict[heap[0][1][0]] = '0'
    encoded_data = ''.join([huff_dict[symbol] for symbol in weights])
    return encoded_data, huff_dict
    
def huffman_decode(encoded_data, huff_dict):
    inverse_dict = {v: k for k, v in huff_dict.items()}
    decoded_weights = []
    code = ''
    for bit in encoded_data:
        code += bit
        if code in inverse_dict:
            decoded_weights.append(inverse_dict[code])
            code = ''
    return decoded_weights

data = "ABRACADABRA"
encoded_data, huff_dict = huffman_encode(data)
print(f"Закодированные данные: {encoded_data}")
# Закодированные данные: 01101001110011110110100

decoded_data = huffman_decode(encoded_data, huff_dict)
print(f"Декодированные данные: {decoded_data}")
# Декодированные данные: ABRACADABRA
 
encoded_data, huffman_dict = huffman_encode(quantized_weights)

Закодированные данные: 01101001110011110110100
Декодированные данные: ['A', 'B', 'R', 'A', 'C', 'A', 'D', 'A', 'B', 'R', 'A']


In [70]:
def bit_packing(bit_string):
    padding_length = (8 - len(bit_string) % 8) % 8
    bit_string_padded = bit_string + '0' * padding_length
    bytes_list = []
    for i in range(0, len(bit_string_padded), 8):
        byte = bit_string_padded[i:i+8]
        bytes_list.append(int(byte, 2))
    packed_data = bytes(bytes_list)
    return packed_data, padding_length

def bit_unpacking(packed_data, padding_length):
    bit_string = ''
    for byte in packed_data:
        bits = bin(byte)[2:].zfill(8)
        bit_string += bits
    if padding_length > 0:
        bit_string = bit_string[:-padding_length]
    return bit_string

packed_data, padding_length = bit_packing(encoded_data)

In [71]:
import struct 
import pickle

def write_to_file(filename, packed_data, huffman_dict, step, min_value, padding_length):
    with open(filename, 'wb') as f:
        f.write(struct.pack('ff', step, min_value))
        f.write(struct.pack('H', padding_length))
        huffman_dict_serialized = pickle.dumps(huffman_dict)
        huff_dict_length = len(huffman_dict_serialized)
        f.write(struct.pack('I', huff_dict_length))
        f.write(huffman_dict_serialized)
        packed_data_length = len(packed_data)
        f.write(struct.pack('I', packed_data_length))
        f.write(packed_data)

compressed_filename = 'compressed_data.bin'
write_to_file(compressed_filename, packed_data, huffman_dict, step, min_value, padding_length)

In [72]:
def read_from_file(filename):
    with open(filename, 'rb') as f:
        step, min_value = struct.unpack('ff', f.read(8))
        padding_length = struct.unpack('H', f.read(2))[0]
        huff_dict_length = struct.unpack('I', f.read(4))[0]
        huffman_dict_serialized = f.read(huff_dict_length)
        huffman_dict = pickle.loads(huffman_dict_serialized)
        packed_data_length = struct.unpack('I', f.read(4))[0]
        packed_data = f.read(packed_data_length)
    return packed_data, huffman_dict, step, min_value, padding_length

packed_data_read, huffman_dict_read, step_read, min_value_read, padding_length_read = read_from_file(compressed_filename)
unpacked_bits = bit_unpacking(packed_data_read, padding_length_read)
decoded_weights = huffman_decode(unpacked_bits, huffman_dict_read)
decoded_weights = [int(w) for w in decoded_weights]
recovered_weights = dequantize_weights(decoded_weights, step_read, min_value_read)

In [73]:
max_difference = max([abs(w - rw) for w, rw in zip(weights, recovered_weights)])
print(f"Максимальная разница после восстановления: {max_difference}")

Максимальная разница после восстановления: 0.009999999715547894


In [74]:
def get_size(filename):
    return os.stat(filename).st_size / (1024 * 1024)

import os
print(f'Размер файла в мегабайтах {get_size(compressed_filename)}')

import struct
original_filename = 'original_data.bin'

with open(compressed_filename, 'wb') as f:
    for number in weights:
        packed_data = struct.pack('f', number)
        f.write(packed_data)

file_stats = os.stat(original_filename)
print(f'Размер файла в мегабайтах {get_size(original_filename)}')

Размер файла в мегабайтах 0.5255765914916992


FileNotFoundError: [WinError 2] Не удается найти указанный файл: 'original_data.bin'