# CIFAR10 Dataset Compression

In [6]:
import numpy as np
import matplotlib.pyplot as plt
from Outils.dataloader import load_CIFAR10
import matplotlib.pyplot as plt
%matplotlib inline

import os 
import glob

## Data Loading

In [3]:
from Outils.dataloader import load_CIFAR10

# Load the raw CIFAR-10 data.
cifar10_dir = 'Dataset/cifar-10-batches-py'

# Cleaning up variables to prevent loading data multiple times (which may cause memory issue)
try:
   del X_train, y_train
   del X_test, y_test
   print('Clear previously loaded data.')
except:
   pass

X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)

X = np.concatenate([X_train, X_test])
X.shape

Clear previously loaded data.


(60000, 32, 32, 3)

## Loading the clusters

Here we'll load the clusters obtained from KMeans Clustering algorithm for different number of clusters into a dictionary for comparison purpose.

Note that the clusters are saved in `.\Clusters\` folder

In [13]:
cluster_path = "./Clusters/*.npy"   # Path to all .npy files
clusters = {}
for f in glob.glob(cluster_path):
    clusters[f[11:19]] = np.load(f)

clusters

{'KMeans10': array([7, 9, 6, ..., 8, 9, 3]),
 'KMeans20': array([17,  3,  0, ...,  6, 13,  1]),
 'KMeans30': array([28, 18, 27, ..., 13, 20, 11])}

## Apply Compression Methods

In [48]:
from Outils.compression import single_compression
import min_max_diff_flatten as mmdf 
import cv2

### Get the image sets (clusters)

In [59]:
for name, cluster in clusters.items():
    print(name)
    for value in np.unique(cluster):
        image_set = X[cluster == value]
        compressed_set, min, max = mmdf.Encoder(image_set)
        # single_compression(compressed_set, path = './Dataset/Compressed/', method = 'mmdf_png')

        path = ".\Dataset\Compressed\mmdf_png\ "  

        if os.path.exists(path):
            # os.rmdir(path)
            files = glob.glob(path + '*')
            for f in files:
                os.remove(f)
        else:
            os.mkdir(path)

        print(compressed_set.shape)

        # os.chdir(path)

        # for i in range(compressed_set.shape[0]):
        #     cv2.imwrite(path + name + "cl" + str(value) + 'im' + str(i) + ".png", compressed_set[i])
        #     cv2.imwrite(path + name + "min" + str(value) + ".png", min)
        #     cv2.imwrite(path + name + "max" + str(value) + ".png", max)


KMeans10
(4414, 3072)
(8082, 3072)
(3318, 3072)
(8639, 3072)
(4987, 3072)
(5276, 3072)
(7022, 3072)
(6020, 3072)
(5810, 3072)
(6432, 3072)
KMeans20
(2401, 3072)
(3361, 3072)
(3522, 3072)
(3179, 3072)
(2489, 3072)
(2452, 3072)
(2742, 3072)
(4182, 3072)
(1746, 3072)
(2413, 3072)
(2572, 3072)
(2187, 3072)
(3353, 3072)
(3418, 3072)
(4512, 3072)
(3015, 3072)
(1682, 3072)
(4830, 3072)
(3600, 3072)
(2344, 3072)
KMeans30
(2706, 3072)
(1320, 3072)
(1978, 3072)
(1383, 3072)
(2441, 3072)
(2181, 3072)
(1667, 3072)
(2482, 3072)
(1229, 3072)
(1550, 3072)
(1308, 3072)
(2055, 3072)
(2756, 3072)
(2178, 3072)
(1422, 3072)
(1755, 3072)
(1374, 3072)
(1336, 3072)
(1905, 3072)
(3339, 3072)
(1787, 3072)
(3317, 3072)
(1959, 3072)
(1549, 3072)
(2003, 3072)


KeyboardInterrupt: 