In [5]:
import numpy as np
import pandas as pd
import zipfile
from skimage.io import imread
from skimage.transform import resize
from typing import Tuple
from sklearn.preprocessing import LabelEncoder


def take_hand_gestures_image_class_name(line: str) -> str:
    return line.split('/')[0]

def take_gem_class(line: str) -> str:
    return line.split('/')[1]


def take_food_image_class_name(line: str) -> str:
    return line.split('/')[1]


def take_flower_image_class_name(line: str) -> str:
    return line.split('/')[-1].split('_')[0]


sets_name = {'flower': take_flower_image_class_name,
                'food': take_food_image_class_name, 'hand_gestures': take_hand_gestures_image_class_name,
                'gems': take_gem_class}


def read_images(file_location: str, file_format: str, image_size: Tuple[int, int], set_name: str, num_of_records=-1):
    classes = []
    images = []
    take_class_name = sets_name[set_name]
    with zipfile.ZipFile(file_location, 'r') as z:
        i = 1
        for file in z.namelist():
            #print(file)
            if file.endswith(file_format):
                ifile = z.open(file)
                image = imread(ifile, as_gray=True)
                if image.shape[0] != image_size[0] or image.shape[1] != image_size[1]:
                    image = resize(image, image_size, anti_aliasing=True)
                classes.append(take_class_name(ifile.name))
                images.append(image.ravel())
                i += 1
                if i == num_of_records:
                    break

    image_matrix = np.stack(images)
    del images
    le = LabelEncoder()
    le.fit(classes)
    classes = le.transform(classes)
    
    return classes,  image_matrix

labels_flower, images_flower = read_images(
    file_location='/home/sandalas/studia/8 sem/Project/data/raw_data/d17 - gemstones_I.zip', file_format='.jpg',
    image_size=(128, 128), set_name='gems',
    num_of_records=1000)

print(labels_flower)

[ 0  0  0  0  1  1  1  1  2  2  2  2  3  3  3  3  4  4  4  4  5  5  5  5
  6  6  6  6  7  7  7  7  8  8  8  8  8  9  9  9  9  9 10 10 10 10 11 11
 11 11 12 12 12 12 13 13 13 13 14 14 14 14 15 15 15 15 15 16 16 16 16 17
 17 17 17 19 19 19 19 18 18 18 18 20 20 20 20 21 21 21 21 22 22 22 22 23
 23 23 23 24 24 24 24 24 25 25 25 25 26 26 26 26 27 27 27 27 28 28 28 28
 29 29 29 29 30 30 30 30 30 31 31 31 31 32 32 32 32 33 33 33 33 34 34 34
 34 35 35 35 35 36 36 36 36 37 37 37 37 38 38 38 38 39 39 39 39 39 40 40
 40 40 41 41 41 41 42 42 42 42 42 43 43 43 43 44 44 44 44 45 45 45 45 46
 46 46 46 46 47 47 47 47 48 48 48 48 49 49 49 49 50 50 50 50 51 51 51 51
 51 52 52 52 52 53 53 53 53 53 54 54 54 54 54 55 55 55 55 55 56 56 56 56
 57 57 57 57 58 58 58 58 59 59 59 59 60 60 60 60 60 61 61 61 61 62 62 62
 62 63 63 63 63 64 64 64 64 65 65 65 65 66 66 66 66 67 67 67 67 68 68 68
 68 69 69 69 69 70 70 70 70 71 71 71 71 72 72 72 72 73 73 73 73 74 74 74
 74 75 75 75 75 76 76 76 76 77 77 77 77 78 78 78 78