# **Create AwA2 dataset for Hugging Face...**

The aim is to achieve a consistent dataset format for use in machine vision and for Hugging Face.

## **Load and Imports libraries**

In [1]:
!apt install git-lfs




git-lfs is already the newest version (3.0.2-1ubuntu0.3).
0 upgraded, 0 newly installed, 0 to remove and 38 not upgraded.


In [2]:
print("\n",100*"#","\n")
%pip show polars
print("\n",100*"#","\n")
%pip show pandas
print("\n",100*"#","\n")
%pip show numpy
print("\n",100*"#","\n")
%pip show matplotlib
print("\n",100*"#","\n")
%pip show huggingface_hub
print("\n",100*"#","\n")
%pip show datasets
print("\n",100*"#","\n")
%pip show pyarrow
print("\n",100*"#","\n")
%pip show pillow
print("\n",100*"#","\n")
%pip show opencv-python
print("\n",100*"#","\n")


 #################################################################################################### 

Name: polars
Version: 1.21.0
Summary: Blazingly fast DataFrame library
Home-page: https://www.pola.rs/
Author: 
Author-email: Ritchie Vink <ritchie46@gmail.com>
License: 
Location: /usr/local/lib/python3.11/dist-packages
Requires: 
Required-by: cudf-polars-cu12
Note: you may need to restart the kernel to use updated packages.

 #################################################################################################### 

Name: pandas
Version: 2.2.3
Summary: Powerful data structures for data analysis, time series, and statistics
Home-page: https://pandas.pydata.org
Author: 
Author-email: The Pandas Development Team <pandas-dev@python.org>
License: BSD 3-Clause License

Copyright (c) 2008-2011, AQR Capital Management, LLC, Lambda Foundry, Inc. and PyData Development Team
All rights reserved.

Copyright (c) 2011-2023, Open source contributors.

Redistribu

In [3]:
import zipfile
import os, sys
import base64
import io
import shutil
import json
import random
import subprocess
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import polars as pl
import pyarrow as pa
import cv2
import pickle

import pyarrow.parquet as pq

from PIL import Image

from kaggle_secrets import UserSecretsClient

from datasets import Dataset, DatasetDict

from huggingface_hub import (
    Repository, 
    get_full_repo_name,
    login,
    upload_folder,
    hf_hub_download,
    HfApi
)

from tqdm import tqdm

## **Support functions**

In [4]:
def save_pickle(input_save, file_path):
    try:
        with open(file_path, "wb") as outfile:
            pickle.dump(input_save, outfile)
            print(f"File: {file_path} was saved.")
    except Exception as err:
        print(f"Save file: {file_path} false!\n", err)

In [5]:
def load_pickle(file_path):
    try:
        with open(file_path, "rb") as infile:
         	record_obj = pickle.load(infile)
        return record_obj
    except Exception as err:
        print(f"Load file: {file_path} false!\n", err)

In [6]:
def load_text_file(file_path):
    try:
        with open(file_path, "r") as file:
            content = file.read()
        return content
    except Exception as err:
        print(f"Load file: {file_path} false!\n", err)

In [7]:
def save_text_file(text_save, file_path):
    try:
        with open(file_path, "a") as f:
            f.write(text_save)
            print(f"File: {file_path} was saved.")
    except Exception as err:
        print(f"Save file: {file_path} false!\n", err)

In [8]:
def load_json_file(file_path):
    try:
        with open(file_path, 'r') as file:
            data = json.load(file)
        return data
    except Exception as err:
        print(f"Load file: {file_path} false!\n", err)

In [9]:
def save_json_file(input_data, file_path):
    try:
        with open(file_path, "w") as f:
            json.dump(input_data, f, indent=4)
            print(f"File: {file_path} was saved.")
    except Exception as err:
        print(f"Save file: {file_path} false!\n", err)
    

In [10]:
def list_files_dir(path_dir):
    files_dist = {}
    
    file_names = os.listdir(path_dir)
    
    for file_name in file_names:
        name = file_name.split('.')[0]
        path = os.path.join(path_dir, file_name)
        files_dist[name] = path
    
    return files_dist

In [11]:
def list_dir_in_dir(path_dir):
    directories = [name for name in os.listdir(path_dir) if os.path.isdir(os.path.join(path_dir, name))]
    return directories

In [12]:
def img_to_base64(path_img):
    s_obj = io.BytesIO()
    s_obj.seek(0)
    img = Image.open(path_img)
    img.save(s_obj, format=img.format)  # We save the original format (JPEG, PNG, atd.)
    base64_str = base64.b64encode(s_obj.getvalue()).decode('utf-8')
    return base64_str

In [13]:
def base64_to_img(base64_str):
    img_bytes = base64.b64decode(base64_str)
    img_buffer = io.BytesIO(img_bytes)
    img = Image.open(img_buffer)
    
    return img

## **Load data**

#### ***Path of directories***

In [14]:
path_pose_data = os.path.join('/kaggle','input','awa2-dataset','AwA2','pose','Annotations')
path_images = os.path.join('/kaggle','input','awa2-dataset','AwA2','data','JPEGImages')
path_license_image = os.path.join('/kaggle','input','awa2-dataset','AwA2','data','licenses')

#### ***Names of directories of files***

#### *Images:*

In [15]:
list_dir_images = list_dir_in_dir(path_images)
list_dir_images_sort = sorted(list_dir_images)
print(f"Number of images of directories is {len(list_dir_images_sort)}")
print('\n', list_dir_images_sort)

Number of images of directories is 50

 ['antelope', 'bat', 'beaver', 'blue+whale', 'bobcat', 'buffalo', 'chihuahua', 'chimpanzee', 'collie', 'cow', 'dalmatian', 'deer', 'dolphin', 'elephant', 'fox', 'german+shepherd', 'giant+panda', 'giraffe', 'gorilla', 'grizzly+bear', 'hamster', 'hippopotamus', 'horse', 'humpback+whale', 'killer+whale', 'leopard', 'lion', 'mole', 'moose', 'mouse', 'otter', 'ox', 'persian+cat', 'pig', 'polar+bear', 'rabbit', 'raccoon', 'rat', 'rhinoceros', 'seal', 'sheep', 'siamese+cat', 'skunk', 'spider+monkey', 'squirrel', 'tiger', 'walrus', 'weasel', 'wolf', 'zebra']


#### *licence of images:*

In [16]:
list_dir_licenses = list_dir_in_dir(path_license_image)
list_dir_licenses_sort = sorted(list_dir_licenses)
print(f"Number of licenses of directories is {len(list_dir_licenses_sort)}")
print('\n', list_dir_licenses_sort)

Number of licenses of directories is 50

 ['antelope', 'bat', 'beaver', 'blue+whale', 'bobcat', 'buffalo', 'chihuahua', 'chimpanzee', 'collie', 'cow', 'dalmatian', 'deer', 'dolphin', 'elephant', 'fox', 'german+shepherd', 'giant+panda', 'giraffe', 'gorilla', 'grizzly+bear', 'hamster', 'hippopotamus', 'horse', 'humpback+whale', 'killer+whale', 'leopard', 'lion', 'mole', 'moose', 'mouse', 'otter', 'ox', 'persian+cat', 'pig', 'polar+bear', 'rabbit', 'raccoon', 'rat', 'rhinoceros', 'seal', 'sheep', 'siamese+cat', 'skunk', 'spider+monkey', 'squirrel', 'tiger', 'walrus', 'weasel', 'wolf', 'zebra']


#### *pose data:*

In [17]:
list_dir_pose_data = list_dir_in_dir(path_pose_data)
list_dir_pose_data_sort = sorted(list_dir_pose_data)
print(f"Number of licenses of directories is {len(list_dir_pose_data_sort)}")
print('\n', list_dir_pose_data_sort)

Number of licenses of directories is 37

 ['antelope', 'bobcat', 'buffalo', 'chihuahua', 'collie', 'cow', 'dalmatian', 'deer', 'elephant', 'fox', 'german+shepherd', 'giant+panda', 'giraffe', 'grizzly+bear', 'hippopotamus', 'horse', 'leopard', 'lion', 'moose', 'otter', 'ox', 'persian+cat', 'pig', 'polar+bear', 'rabbit', 'raccoon', 'rat', 'rhinoceros', 'sample', 'sheep', 'siamese+cat', 'skunk', 'squirrel', 'tiger', 'weasel', 'wolf', 'zebra']


> Note: At first glance, it is clear that the image dataset and their positions are not identical.

#### *Check class*

In [18]:
print(list_dir_licenses_sort==list_dir_images_sort)
print(list_dir_licenses_sort==list_dir_pose_data_sort and list_dir_pose_data_sort==list_dir_images_sort)

True
False


## **Create dataframe of pose annotation**

In [19]:
total_dataset_list = []

In [20]:
%%time
for directory in tqdm(list_dir_pose_data_sort):
    path = os.path.join(path_pose_data, directory)
    data_list = list_files_dir(path)
    for data_path in data_list:
        path_input = os.path.join(path_pose_data, directory, data_path + '.pickle')
        data = load_pickle(path_input)
        data_input = data['a1']
        data_input['name_file'] = data_path
        data_input['name_class'] = directory
        total_dataset_list.append(data_input)
       
        

100%|██████████| 37/37 [01:23<00:00,  2.25s/it]

CPU times: user 3 s, sys: 2.61 s, total: 5.61 s
Wall time: 1min 23s





In [21]:
#total_dataset_list

In [22]:
df_keypoints_data = pd.DataFrame(total_dataset_list)
df_polars_keypoints_data = pl.from_pandas(df_keypoints_data)

In [23]:
df_polars_keypoints_data.head(10)

right_eye,right_earbase,right_earend,right_antler_base,right_antler_end,left_antler_base,left_antler_end,left_earbase,left_earend,left_eye,nose,upper_jaw,lower_jaw,mouth_end_right,throat_base,neck_base,neck_end,back_base,back_middle,back_end,tail_base,body_middle_right,bbox,mouth_end_left,throat_end,tail_end,front_left_thai,front_left_knee,front_left_paw,front_right_thai,front_right_paw,front_right_knee,back_left_knee,back_left_paw,back_left_thai,back_right_thai,back_right_paw,back_right_knee,belly_bottom,body_middle_left,name_file,name_class,left_antlerbase,left_antlerend,right_antlerend,right_antlerbase,neckbase,neckend,backbase,backmiddle,backend,tailend,bellybottom,back_right_pi,tail_e,left_eara,right_earE,right_earea,left_ear_base,left_ear_end,right_ear_end,right_ear_base,tail_ea,wither,throat,left_ey,throat_be,tail_be,back_left_ta
list[f64],list[f64],list[f64],list[f64],list[f64],list[f64],list[f64],list[f64],list[f64],list[f64],list[f64],list[f64],list[f64],list[f64],list[f64],list[f64],list[f64],list[f64],list[f64],list[f64],list[f64],list[f64],list[f64],list[f64],list[f64],list[f64],list[f64],list[f64],list[f64],list[f64],list[f64],list[f64],list[f64],list[f64],list[f64],list[f64],list[f64],list[f64],list[f64],list[f64],str,str,list[f64],list[f64],list[f64],list[f64],list[f64],list[f64],list[f64],list[f64],list[f64],list[f64],list[f64],list[f64],list[f64],list[f64],list[f64],list[f64],list[f64],list[f64],list[f64],list[f64],list[f64],list[f64],list[f64],list[f64],list[f64],list[f64],list[f64]
"[359.571429, 658.428571]","[285.857143, 634.428571]","[96.142857, 520.142857]","[363.0, 563.0]","[67.0, 92.714286]","[453.857143, 539.571429]","[296.714286, 82.428571]","[407.0, 539.0]","[423.0, 467.571429]","[508.142857, 635.571429]","[544.142857, 796.714286]","[550.428571, 821.285714]","[541.857143, 840.714286]","[488.142857, 831.0]","[453.285714, 865.285714]","[290.428571, 696.142857]","[265.857143, 869.285714]","[253.285714, 871.0]","[107.571429, 824.714286]","[32.714286, 785.857143]","[15.0, 783.571429]","[21.285714, 965.857143]","[0.0, 73.0, … 1022.428571]","[-1.0, -1.0]","[-1.0, -1.0]","[-1.0, -1.0]","[-1.0, -1.0]","[-1.0, -1.0]","[-1.0, -1.0]","[-1.0, -1.0]","[-1.0, -1.0]","[-1.0, -1.0]","[-1.0, -1.0]","[-1.0, -1.0]","[-1.0, -1.0]","[-1.0, -1.0]","[-1.0, -1.0]","[-1.0, -1.0]","[-1.0, -1.0]","[-1.0, -1.0]","""antelope_10060""","""antelope""",,,,,,,,,,,,,,,,,,,,,,,,,,,
"[799.454545, 213.727273]","[769.909091, 195.545455]","[790.363636, 158.727273]","[-1.0, -1.0]","[-1.0, -1.0]","[-1.0, -1.0]","[-1.0, -1.0]","[794.0, 183.727273]","[809.0, 156.454545]","[-1.0, -1.0]","[856.727273, 260.090909]","[854.0, 271.909091]","[843.090909, 274.636364]","[831.272727, 266.454545]","[764.0, 257.818182]","[744.0, 211.0]","[658.545455, 269.181818]","[643.090909, 270.090909]","[501.727273, 255.545455]","[340.363636, 290.090909]","[329.909091, 299.636364]","[509.909091, 319.636364]","[225.363636, 150.090909, … 614.636364]","[-1.0, -1.0]","[669.909091, 340.545455]","[335.363636, 373.727273]","[618.545455, 398.272727]","[658.545455, 431.0]","[662.636364, 541.0]","[594.0, 368.272727]","[567.636364, 586.454545]","[569.909091, 471.454545]","[384.909091, 471.0]","[446.727273, 575.090909]","[409.909091, 405.090909]","[389.454545, 365.090909]","[243.545455, 595.545455]","[298.090909, 460.090909]","[496.272727, 399.636364]","[-1.0, -1.0]","""antelope_10014""","""antelope""",,,,,,,,,,,,,,,,,,,,,,,,,,,
"[-1.0, -1.0]","[447.181818, 291.909091]","[433.545455, 252.363636]","[-1.0, -1.0]","[-1.0, -1.0]","[-1.0, -1.0]","[-1.0, -1.0]","[464.909091, 317.363636]","[463.545455, 278.272727]","[427.636364, 335.545455]","[375.818182, 356.0]","[371.727273, 363.727273]","[376.272727, 376.0]","[-1.0, -1.0]","[446.727273, 395.090909]","[491.727273, 333.272727]","[521.727273, 397.818182]","[533.090909, 399.181818]","[619.454545, 395.090909]","[674.454545, 400.090909]","[678.545455, 406.0]","[-1.0, -1.0]","[364.0, 243.272727, … 502.818182]","[395.818182, 370.545455]","[474.909091, 452.818182]","[-1.0, -1.0]","[562.181818, 465.545455]","[481.272727, 485.545455]","[-1.0, -1.0]","[-1.0, -1.0]","[-1.0, -1.0]","[453.545455, 471.454545]","[685.363636, 477.363636]","[-1.0, -1.0]","[689.909091, 439.181818]","[611.727273, 366.454545]","[-1.0, -1.0]","[564.454545, 374.181818]","[-1.0, -1.0]","[641.727273, 446.909091]","""antelope_10453""","""antelope""",,,,,,,,,,,,,,,,,,,,,,,,,,,
"[566.258373, 247.325359]","[553.818182, 225.315789]","[500.708134, 189.909091]","[577.741627, 209.047847]","[526.545455, 31.057416]","[614.583732, 206.655502]","[651.904306, 23.880383]","[640.421053, 225.794258]","[692.095694, 193.736842]","[631.808612, 247.325359]","[603.100478, 302.349282]","[603.578947, 314.311005]","[603.100478, 321.488038]","[583.483254, 313.354067]","[602.143541, 341.583732]","[567.69378, 286.559809]","[555.732057, 340.148325]","[534.200957, 336.320574]","[368.650718, 333.449761]","[137.07177, 362.636364]","[122.717703, 375.076555]","[369.129187, 416.22488]","[88.0, 14.789474, … 856.0]","[621.282297, 309.526316]","[601.665072, 444.933014]","[158.602871, 502.827751]","[524.15311, 529.62201]","[510.755981, 660.244019]","[537.550239, 822.444976]","[482.047847, 495.650718]","[508.363636, 821.488038]","[486.832536, 652.588517]","[158.124402, 649.239234]","[190.660287, 830.578947]","[-1.0, -1.0]","[200.708134, 476.033493]","[118.889952, 841.105263]","[131.808612, 632.014354]","[349.511962, 520.5311]","[-1.0, -1.0]","""antelope_10054""","""antelope""",,,,,,,,,,,,,,,,,,,,,,,,,,,
"[380.142857, 220.142857]","[286.428571, 190.428571]","[141.285714, 58.428571]","[354.428571, 168.142857]","[349.285714, 106.428571]","[371.0, 133.285714]","[358.428571, 85.285714]","[305.285714, 141.857143]","[390.428571, 15.571429]","[-1.0, -1.0]","[505.857143, 271.0]","[500.142857, 291.0]","[483.0, 305.285714]","[447.0, 294.428571]","[328.714286, 314.428571]","[237.857143, 242.428571]","[182.428571, 365.857143]","[187.0, 342.428571]","[479.571429, 203.0]","[784.714286, 221.857143]","[807.571429, 247.0]","[-1.0, -1.0]","[132.714286, 9.285714, … 1013.285714]","[-1.0, -1.0]","[247.0, 462.428571]","[784.714286, 438.428571]","[352.714286, 529.285714]","[321.857143, 724.714286]","[264.142857, 983.0]","[265.285714, 559.571429]","[225.857143, 947.0]","[263.0, 723.0]","[827.0, 621.285714]","[792.714286, 928.714286]","[701.857143, 426.428571]","[-1.0, -1.0]","[772.142857, 842.428571]","[837.857143, 543.0]","[501.285714, 513.285714]","[495.0, 347.0]","""antelope_10196""","""antelope""",,,,,,,,,,,,,,,,,,,,,,,,,,,
"[389.0, 335.090909]","[372.636364, 307.818182]","[319.0, 227.818182]","[397.181818, 285.090909]","[377.181818, 52.818182]","[453.090909, 286.0]","[445.363636, 49.636364]","[475.363636, 304.181818]","[543.545455, 245.545455]","[470.363636, 337.818182]","[447.636364, 406.909091]","[452.181818, 425.090909]","[440.363636, 436.0]","[422.636364, 423.727273]","[427.181818, 462.363636]","[371.272727, 376.909091]","[369.0, 426.454545]","[-1.0, -1.0]","[-1.0, -1.0]","[-1.0, -1.0]","[-1.0, -1.0]","[316.272727, 550.090909]","[305.363636, 43.727273, … 679.636364]","[-1.0, -1.0]","[415.363636, 598.727273]","[-1.0, -1.0]","[487.636364, 641.454545]","[-1.0, -1.0]","[-1.0, -1.0]","[368.090909, 662.363636]","[-1.0, -1.0]","[-1.0, -1.0]","[-1.0, -1.0]","[-1.0, -1.0]","[499.0, 658.272727]","[346.272727, 664.636364]","[-1.0, -1.0]","[-1.0, -1.0]","[-1.0, -1.0]","[534.454545, 535.090909]","""antelope_10173""","""antelope""",,,,,,,,,,,,,,,,,,,,,,,,,,,
"[-1.0, -1.0]","[484.0, 250.0]","[397.181818, 190.454545]","[-1.0, -1.0]","[-1.0, -1.0]","[-1.0, -1.0]","[-1.0, -1.0]","[541.727273, 286.363636]","[540.363636, 222.727273]","[488.545455, 313.636364]","[352.636364, 380.454545]","[355.818182, 403.181818]","[368.090909, 415.909091]","[-1.0, -1.0]","[492.181818, 421.363636]","[556.272727, 363.636364]","[544.454545, 456.818182]","[549.0, 440.454545]","[586.272727, 374.545455]","[599.0, 346.818182]","[602.181818, 343.636364]","[-1.0, -1.0]","[338.090909, 183.636364, … 686.0]","[409.454545, 403.181818]","[512.636364, 597.727273]","[-1.0, -1.0]","[622.636364, 654.090909]","[-1.0, -1.0]","[-1.0, -1.0]","[497.636364, 662.727273]","[-1.0, -1.0]","[-1.0, -1.0]","[-1.0, -1.0]","[-1.0, -1.0]","[682.181818, 647.727273]","[-1.0, -1.0]","[-1.0, -1.0]","[-1.0, -1.0]","[642.636364, 654.090909]","[693.090909, 505.454545]","""antelope_10282""","""antelope""",,,,,,,,,,,,,,,,,,,,,,,,,,,
"[-1.0, -1.0]","[302.714286, 479.0]","[371.857143, 491.0]","[285.571429, 433.857143]","[218.142857, 119.0]","[243.857143, 435.0]","[82.714286, 101.285714]","[203.857143, 463.571429]","[109.571429, 476.142857]","[-1.0, -1.0]","[-1.0, -1.0]","[335.857143, 583.571429]","[325.0, 594.428571]","[325.0, 582.428571]","[-1.0, -1.0]","[223.285714, 478.428571]","[147.285714, 542.428571]","[131.857143, 545.857143]","[271.285714, 702.428571]","[461.571429, 828.714286]","[489.0, 854.428571]","[-1.0, -1.0]","[69.571429, 93.285714, … 949.285714]","[-1.0, -1.0]","[-1.0, -1.0]","[-1.0, -1.0]","[98.714286, 756.714286]","[-1.0, -1.0]","[-1.0, -1.0]","[-1.0, -1.0]","[-1.0, -1.0]","[-1.0, -1.0]","[-1.0, -1.0]","[-1.0, -1.0]","[-1.0, -1.0]","[526.714286, 727.571429]","[-1.0, -1.0]","[613.571429, 883.571429]","[-1.0, -1.0]","[156.428571, 816.142857]","""antelope_10137""","""antelope""",,,,,,,,,,,,,,,,,,,,,,,,,,,
"[146.671309, 165.45961]","[124.387187, 151.253482]","[91.518106, 120.891365]","[146.392758, 146.239554]","[140.821727, 128.969359]","[157.534819, 144.846797]","[151.963788, 127.855153]","[150.571031, 144.846797]","[143.328691, 113.927577]","[-1.0, -1.0]","[180.376045, 183.844011]","[182.604457, 190.807799]","[177.869081, 194.707521]","[165.612813, 187.743733]","[140.821727, 196.935933]","[127.451253, 187.743733]","[132.465181, 222.841226]","[154.749304, 223.119777]","[158.927577, 212.534819]","[167.562674, 204.178273]","[170.626741, 203.899721]","[-1.0, -1.0]","[87.339833, 110.027855, … 456.824513]","[-1.0, -1.0]","[141.37883, 254.874652]","[-1.0, -1.0]","[170.348189, 289.693593]","[169.512535, 359.331476]","[165.612813, 444.011142]","[131.072423, 278.830084]","[126.058496, 372.70195]","[120.487465, 290.807799]","[191.239554, 351.532033]","[194.303621, 438.16156]","[195.696379, 285.236769]","[151.963788, 307.520891]","[177.311978, 450.139276]","[-1.0, -1.0]","[-1.0, -1.0]","[191.518106, 252.64624]","""antelope_10433""","""antelope""",,,,,,,,,,,,,,,,,,,,,,,,,,,
"[415.742794, 257.514412]","[403.991131, 236.006652]","[405.321508, 213.390244]","[417.738359, 239.554324]","[424.611973, 166.161863]","[-1.0, -1.0]","[414.855876, 168.157428]","[-1.0, -1.0]","[-1.0, -1.0]","[-1.0, -1.0]","[413.08204, 291.217295]","[407.095344, 293.656319]","[398.226164, 293.21286]","[403.547672, 289.0]","[375.166297, 240.219512]","[394.678492, 223.146341]","[360.75388, 164.166297]","[358.093126, 156.849224]","[301.552106, 98.977827]","[220.842572, 100.751663]","[212.86031, 106.960089]","[-1.0, -1.0]","[179.379157, 83.456763, … 311.394678]","[-1.0, -1.0]","[340.576497, 203.190687]","[221.286031, 167.04878]","[292.904656, 187.226164]","[296.008869, 229.798226]","[300.665188, 302.968958]","[325.055432, 175.696231]","[366.075388, 299.643016]","[335.920177, 232.237251]","[193.569845, 212.946785]","[191.796009, 302.747228]","[209.756098, 161.062084]","[246.784922, 171.039911]","[231.485588, 302.525499]","[218.181818, 219.37694]","[282.039911, 171.7051]","[-1.0, -1.0]","""antelope_10166""","""antelope""",,,,,,,,,,,,,,,,,,,,,,,,,,,


In [24]:
columns_keypoints_df = df_polars_keypoints_data.columns
columns_keypoints_df

['right_eye',
 'right_earbase',
 'right_earend',
 'right_antler_base',
 'right_antler_end',
 'left_antler_base',
 'left_antler_end',
 'left_earbase',
 'left_earend',
 'left_eye',
 'nose',
 'upper_jaw',
 'lower_jaw',
 'mouth_end_right',
 'throat_base',
 'neck_base',
 'neck_end',
 'back_base',
 'back_middle',
 'back_end',
 'tail_base',
 'body_middle_right',
 'bbox',
 'mouth_end_left',
 'throat_end',
 'tail_end',
 'front_left_thai',
 'front_left_knee',
 'front_left_paw',
 'front_right_thai',
 'front_right_paw',
 'front_right_knee',
 'back_left_knee',
 'back_left_paw',
 'back_left_thai',
 'back_right_thai',
 'back_right_paw',
 'back_right_knee',
 'belly_bottom',
 'body_middle_left',
 'name_file',
 'name_class',
 'left_antlerbase',
 'left_antlerend',
 'right_antlerend',
 'right_antlerbase',
 'neckbase',
 'neckend',
 'backbase',
 'backmiddle',
 'backend',
 'tailend',
 'bellybottom',
 'back_right_pi',
 'tail_e',
 'left_eara',
 'right_earE',
 'right_earea',
 'left_ear_base',
 'left_ear_end',
 

## **Create dataframe of image**

In [25]:
total_image_list = []

In [26]:
%%time
for directory in tqdm(list_dir_images_sort):
    path_img = os.path.join(path_images, directory)
    path_license = os.path.join(path_license_image, directory)
    data_list_img = list_files_dir(path_img)
    data_list_license = list_files_dir(path_license)
    for i, data_path in enumerate(data_list_img):
        data_dict = {}
        
        path_input_img = os.path.join(path_img, data_path + '.jpg')
        path_input_license = os.path.join(path_license, data_path + '.txt')
        
        image_base64 = img_to_base64(path_input_img)
        text_license = load_text_file(path_input_license)
        
        data_dict['name_file'] = data_path
        data_dict['name_class'] = directory
        data_dict['image_base64s'] = image_base64
        data_dict['image_license'] = text_license
        total_image_list.append(data_dict)

100%|██████████| 50/50 [21:37<00:00, 25.95s/it]

CPU times: user 8min 32s, sys: 43.6 s, total: 9min 16s
Wall time: 21min 37s





In [27]:
#total_image_list

In [28]:
df_images_data = pd.DataFrame(total_image_list)
df_polars_images_data = pl.from_pandas(df_images_data)

In [29]:
df_polars_images_data.head(10)

name_file,name_class,image_base64s,image_license
str,str,str,str
"""antelope_10702""","""antelope""","""/9j/4AAQSkZJRgABAQAAAQABAAD/2w…","""+-----------------------------…"
"""antelope_10558""","""antelope""","""/9j/4AAQSkZJRgABAQAAAQABAAD/2w…","""+-----------------------------…"
"""antelope_10111""","""antelope""","""/9j/4AAQSkZJRgABAQAAAQABAAD/2w…","""+-----------------------------…"
"""antelope_10192""","""antelope""","""/9j/4AAQSkZJRgABAQAAAQABAAD/2w…","""+-----------------------------…"
"""antelope_10625""","""antelope""","""/9j/4AAQSkZJRgABAQAAAQABAAD/2w…","""+-----------------------------…"
"""antelope_10216""","""antelope""","""/9j/4AAQSkZJRgABAQAAAQABAAD/2w…","""+-----------------------------…"
"""antelope_11022""","""antelope""","""/9j/4AAQSkZJRgABAQAAAQABAAD/2w…","""+-----------------------------…"
"""antelope_10831""","""antelope""","""/9j/4AAQSkZJRgABAQAAAQABAAD/2w…","""+-----------------------------…"
"""antelope_10186""","""antelope""","""/9j/4AAQSkZJRgABAQAAAQABAAD/2w…","""+-----------------------------…"
"""antelope_10005""","""antelope""","""/9j/4AAQSkZJRgABAQAAAQABAAD//g…","""+-----------------------------…"


In [30]:
columns_images_df = df_polars_images_data.columns
columns_images_df

['name_file', 'name_class', 'image_base64s', 'image_license']