In [1]:
import pandas as pd
from matplotlib import pyplot as plt
import warnings
import matplotlib.pyplot as plt
import pandas as pd
import yaml
warnings.filterwarnings("ignore")
from scripts.create_nab_dataset import create_nab_data
from scripts.create_nuts_wind_data import create_nuts_data
from sz.SZ3.tools.pysz.pysz import SZ
from compress.sprintz_encode import compress_sprintz, decompress_sprintz, get_compress_info_sprintz
from compress.bypass import spatial_clustering
from compress.general_functions import get_errors, get_geo_dict, create_geo_plot
from compress.xor_encode import xor_compress_df, get_compress_info_xor, decompress_xor_df
from compress.lz4_encode import LZ4_compress_df, LZ4_decompress_df, get_compress_info_lz4
from compress.spatial_lz4 import spatial_clustering_PCA_LZ4, spatial_lz4_decompress, get_compress_info_spatial_PCA_LZ4
from compress.spatial_xor import spatial_clustering_xor, spatial_XOR_decompress, get_compress_info_spatial_xor
from compress.spatial_sprintz import spatial_clustering_sprintz, spatial_sprintz_decompress, get_compress_info_spatial_sprintz
from compress.sz3_encode import compress_sz3_df, decompress_sz3, get_compress_info_sz3, compress_sz3_all
from compress.cnn_encode import compress_cnn_sz3, decomress_cnn_sz3, get_compress_info_cnn_sz3, compress_cnn_cluster
from settings import MODEL_PARAMETERS_PATH, RESULTS_PATH

In [2]:
target_countries = ["AT", "CZ", "PL"]
with open(MODEL_PARAMETERS_PATH, 'r') as f:
    parameters = yaml.safe_load(f)

In [3]:
for country in target_countries:
    comprassion_rate = {}
    mape = {}
    mse = {}
    #Загрузка данных
    print(country)
    df_wind, wind_geo_dict= create_nuts_data(country=country)
    params = parameters[country]
    #XOR
    print("\n")
    print("XOR \n")
    compressed_df = xor_compress_df(df_wind)
    comprassion_rate["xor"] = get_compress_info_xor(df_wind, compressed_df)
    #LZ4
    print("\n")
    print("LZ4 \n")
    compressed_df = LZ4_compress_df(df_wind)
    comprassion_rate["lz4"] = get_compress_info_lz4(df_wind, compressed_df)
    #Spatial XOR
    print("\n")
    print("Spatial XOR \n")
    df = df_wind.round(15).copy()
    compressed_df = spatial_clustering_xor(df, wind_geo_dict, params["cor_lvl"])
    comprassion_rate["spatial xor"] = get_compress_info_spatial_xor(df, compressed_df)
    #Spatial Sprintz
    print("\n")
    print("Spatial Sprintz \n")
    df = df_wind*100
    df = df.astype(int)
    compressed_df = spatial_clustering_sprintz(df.copy(), wind_geo_dict, params["cor_lvl"])
    comprassion_rate["spatial sprintz"] = get_compress_info_spatial_sprintz(df, compressed_df)
    #PCA
    print("\n")
    print("PCA \n")
    compressed_df = spatial_clustering_PCA_LZ4(df_wind, wind_geo_dict, params["cor_lvl"])
    comprassion_rate["pca"] = get_compress_info_spatial_PCA_LZ4(df_wind, compressed_df)
    clusters = spatial_clustering(df_wind, wind_geo_dict, params["cor_lvl"])
    dec_res = spatial_lz4_decompress(compressed_df, clusters)
    mse["pca"], mape["pca"] = get_errors(df_wind, dec_res)
    #SZ3
    print("\n")
    print("SZ3 \n")
    compressed_df = compress_sz3_df(df_wind, params["er_abs_sz3"])
    comprassion_rate["sz3"] = get_compress_info_sz3(df_wind, compressed_df)
    data = df_wind.values.transpose()
    shape = [d.shape for d in data]
    type = data[0].dtype
    dec_res = decompress_sz3(compressed_df, shape, type)
    mse["sz3"], mape["sz3"] = get_errors(df_wind, dec_res)
    #CNN + zstd
    print("\n")
    print("CNN + zstd \n")
    enc_df = compress_cnn_sz3(df_wind,
                          wind_geo_dict,
                          cor_lvl=params["cor_lvl"],
                          use_dwt=False,
                          window_size=params["window_size"],
                          num_epochs=params["num_epochs"],
                          extra_layer=params["extra_layer"],
                          conv_filter=params["conv_filter"],
                          plot_flag=False,
                          er_abs_sz3=params["er_abs_sz3"],
                          model_compress="zstd")
    comprassion_rate["CNN_zstd"] = get_compress_info_cnn_sz3(df_wind, enc_df)
    dec_df = decomress_cnn_sz3(enc_df, (5371,), use_dwt=False)
    dec_df = dec_df.sort_index(axis=1)
    mse["CNN_zstd"], mape["CNN_zstd"] = get_errors(df_wind, dec_df)
    #CNN + lz4
    print("\n")
    print("CNN + lz4 \n")
    enc_df = compress_cnn_sz3(df_wind,
                          wind_geo_dict,
                          cor_lvl=params["cor_lvl"],
                          use_dwt=False,
                          window_size=params["window_size"],
                          num_epochs=params["num_epochs"],
                          extra_layer=params["extra_layer"],
                          conv_filter=params["conv_filter"],
                          plot_flag=False,
                          er_abs_sz3=params["er_abs_sz3"],
                          model_compress="lz4")
    comprassion_rate["CNN_lz4"] = get_compress_info_cnn_sz3(df_wind, enc_df)
    dec_df = decomress_cnn_sz3(enc_df, (5371,), use_dwt=False, window_size=64, model_compress="lz4")
    dec_df = dec_df.sort_index(axis=1)
    mse["CNN_lz4"], mape["CNN_lz4"] = mse["CNN_zstd"], mape["CNN_zstd"]
    #CNN + DWT + zstd
    print("\n")
    print("CNN + DWT + zstd \n")
    enc_df = compress_cnn_sz3(df_wind,
                            wind_geo_dict,
                            cor_lvl=params["cor_lvl"],
                            use_dwt=True,
                            window_size=params["window_size"],
                            num_epochs=params["num_epochs_dwt"],
                            conv_filter=params["conv_filter_dwt"],
                            plot_flag=False,
                            er_abs_sz3=params["er_abs_sz3"],
                            model_compress="zstd")
    comprassion_rate["CNN_dwt_zstd"] = get_compress_info_cnn_sz3(df_wind, enc_df)
    dec_df = decomress_cnn_sz3(enc_df, (5371,), use_dwt=True, model_compress="zstd")
    dec_df = dec_df.sort_index(axis=1)
    mse["CNN_dwt_zstd"], mape["CNN_dwt_zstd"] = get_errors(df_wind, dec_df)
    #CNN + DWT + lz4
    print("\n")
    print("CNN + DWT + lz4 \n")
    enc_df = compress_cnn_sz3(df_wind,
                            wind_geo_dict,
                            cor_lvl=params["cor_lvl"],
                            use_dwt=True,
                            window_size=params["window_size"],
                            num_epochs=params["num_epochs_dwt"],
                            conv_filter=params["conv_filter_dwt"],
                            plot_flag=False,
                            er_abs_sz3=params["er_abs_sz3"],
                            model_compress="lz4")
    comprassion_rate["CNN_dwt_lz4"] = get_compress_info_cnn_sz3(df_wind, enc_df)
    dec_df = decomress_cnn_sz3(enc_df, (5371,), use_dwt=True, model_compress="lz4")
    dec_df = dec_df.sort_index(axis=1)
    mse["CNN_dwt_lz4"], mape["CNN_dwt_lz4"] = mse["CNN_dwt_zstd"], mape["CNN_dwt_zstd"]
    result_df = pd.DataFrame({
        'алгоритм': list(comprassion_rate.keys()),
        'коэффициент сжатия': list(comprassion_rate.values()),
        'mse': [mse.get(key, "-") for key in comprassion_rate.keys()],
        'mape': [mape.get(key, "-") for key in comprassion_rate.keys()]
    })
    result_df.to_excel(RESULTS_PATH / f"res_{country}_df.xlsx", index=False)

AT


XOR 

Размер исходных данных: 386712 байт 

Размер сжатых XOR данных: 385123 байт 

Коэффициент сжатия: 1.004


LZ4 

Размер исходных данных: 386712 байт 

Размер сжатых данных: 284113 байт 

Коэффициент сжатия: 1.361


Spatial XOR 

Размер исходных данных: 386712 байт 

Размер сжатых данных: 392609.375 байт 

Коэффициент сжатия: 0.985


Spatial Sprintz 

Размер исходных данных: 26682 байт 

Размер сжатых данных: 14054 байт 

Коэффициент сжатия: 1.899


PCA 

Размер исходных данных: 386712 байт 

Размер сжатых данных: 287266 байт 

Коэффициент сжатия: 1.346
MSE: 0.000795 

MAPE: 13.24 % 



SZ3 

Размер исходных данных: 386712 байт 

Размер сжатых данных: 21396 байт 

Коэффициент сжатия: 18.074
MSE: 0.000289 

MAPE: 20.03 % 



CNN + zstd 

Время обучения: 236.02 секунд
Size of compressed model (bytes): 7799
Время обучения: 254.45 секунд
Size of compressed model (bytes): 7798
Время обучения: 240.57 секунд
Size of compressed model (bytes): 7782
Размер исходных данных: 386712 байт 
