In [1]:
import pandas as pd #data manipulation and analysis
import numpy as np #operations on arrays
import os #interacting with the operating system
import joblib #provides lightweight pipelining/transparent disk-caching of functions and lazy re-evaluation (memoize pattern)
from sklearn.preprocessing import LabelBinarizer #convert multi-class labels to binary labels (belong or does not belong to the class)
from tqdm import tqdm #output a smart progress bar by wrapping around any iterable
from imutils import paths #getting proper image paths

In [2]:
# get all the image paths
image_paths = list(paths.list_images('D:\\Anaconda_zadachki\\RealTimeObjectDetection1\\Tensorflow\\workspace\\images\\collectedImages\\'))
# create a DataFrame 
data = pd.DataFrame()
labels = []
for i, image_path in tqdm(enumerate(image_paths), total=len(image_paths)):
    label = image_path.split(os.path.sep)[-2] #folder_name
    # save the relative path for mapping image to target
    data.loc[i, 'image_path'] = image_path
    labels.append(label)
labels = np.array(labels)
# one hot encode the labels
lb = LabelBinarizer()
labels = lb.fit_transform(labels)
print(f"The first one hot encoded labels: {labels[0]}")
print(f"Mapping the first one hot encoded label to its category: {lb.classes_[0]}")
print(f"Total instances: {len(labels)}")
for i in range(len(labels)):
    index = np.argmax(labels[i])
    data.loc[i, 'target'] = int(index)
# shuffle the dataset
data = data.sample(frac=1).reset_index(drop=True)
# save as CSV file
data.to_csv('D:\\Anaconda_zadachki\\RealTimeObjectDetection1\\Tensorflow\\workspace\\images\\data1.csv', index=False)
# pickle the binarized labels
print('Saving the binarized labels as pickled file')
joblib.dump(lb, 'D:\\Anaconda_zadachki\\RealTimeObjectDetection1\\Tensorflow\\workspace\\out\\lb.pkl')
print(data.head(5))

100%|██████████████████████████████████████████████████████████████████████████| 12180/12180 [00:11<00:00, 1023.84it/s]


The first one hot encoded labels: [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Mapping the first one hot encoded label to its category: A
Total instances: 12180
Saving the binarized labels as pickled file
                                          image_path  target
0  D:\Anaconda_zadachki\RealTimeObjectDetection1\...    38.0
1  D:\Anaconda_zadachki\RealTimeObjectDetection1\...    19.0
2  D:\Anaconda_zadachki\RealTimeObjectDetection1\...    53.0
3  D:\Anaconda_zadachki\RealTimeObjectDetection1\...    19.0
4  D:\Anaconda_zadachki\RealTimeObjectDetection1\...    27.0


In [3]:
print(f"The first one hot encoded labels: {labels[212]}")
print(f"Mapping the first one hot encoded label to its category: {lb.classes_[1]}")

The first one hot encoded labels: [0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Mapping the first one hot encoded label to its category: Agol
