In [1]:
from PIL import Image
import requests
from transformers import AutoProcessor, CLIPVisionModel,CLIPVisionModelWithProjection
import torch
from datasets import load_dataset
import numpy as np

Please refer to [CLIP documentation](https://huggingface.co/docs/transformers/en/model_doc/clip)

In [5]:
from huggingface_hub import login
login(token="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")

# Load Data

In [2]:
# Load Dataste from Huggingface
dataset = load_dataset("MissTiny/WikiArt",cache_dir="D:\\MissTiny\\GitHub\\MLProject")

Resolving data files:   0%|          | 0/66 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/22 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/66 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/22 [00:00<?, ?it/s]

Loading dataset shards:   0%|          | 0/60 [00:00<?, ?it/s]

Loading dataset shards:   0%|          | 0/20 [00:00<?, ?it/s]

In [4]:
dataset

DatasetDict({
    train: Dataset({
        features: ['title', 'artist', 'date', 'genre', 'style', 'description', 'filename', 'image', 'embeddings_pca512', 'image_numpy', 'CLIPVisionModelWithProjection_image_embeds'],
        num_rows: 57529
    })
    test: Dataset({
        features: ['title', 'artist', 'date', 'genre', 'style', 'description', 'filename', 'image', 'embeddings_pca512', 'image_numpy', 'CLIPVisionModelWithProjection_image_embeds'],
        num_rows: 19177
    })
})

In [5]:
dataset['train'][0]['image']

NotImplementedError: Subclasses of Dataset should implement __getitem__.

In [6]:
# Get the image in numpy array format 
image = np.asarray(dataset['train'][0]['image'])
type(image)

NotImplementedError: Subclasses of Dataset should implement __getitem__.

In [7]:
image.shape

NameError: name 'image' is not defined

# Clip Vision Model

- **CLIPVisionModel**： The vision model from CLIP without any head or projection on top.
    - **last_hidden_state** (torch.FloatTensor of shape (batch_size, sequence_length, hidden_size)) — Sequence of hidden-states at the output of the last layer of the model.
    - **pooler_output** (torch.FloatTensor of shape (batch_size, hidden_size)) — Last layer hidden-state of the first token of the sequence (classification token) after further processing through the layers used for the auxiliary pretraining task. E.g. for BERT-family of models, this returns the classification token after processing through a linear layer and a tanh activation function. The linear layer weights are trained from the next sentence prediction (classification) objective during pretraining.

## Load Model

In [7]:
model1 = CLIPVisionModel.from_pretrained("openai/clip-vit-base-patch32")

In [8]:
model1.config

CLIPVisionConfig {
  "_attn_implementation_autoset": true,
  "_name_or_path": "openai/clip-vit-base-patch32",
  "attention_dropout": 0.0,
  "dropout": 0.0,
  "hidden_act": "quick_gelu",
  "hidden_size": 768,
  "image_size": 224,
  "initializer_factor": 1.0,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "model_type": "clip_vision_model",
  "num_attention_heads": 12,
  "num_channels": 3,
  "num_hidden_layers": 12,
  "patch_size": 32,
  "projection_dim": 512,
  "transformers_version": "4.46.2"
}

In [9]:
processor1 = AutoProcessor.from_pretrained("openai/clip-vit-base-patch32")

## Convert to CLIP Embedding

In [11]:
inputs = processor1(images=image, return_tensors="pt")

In [12]:
inputs

{'pixel_values': tensor([[[[-0.7558, -0.8288, -0.8726,  ..., -1.0915, -0.9602, -0.9748],
          [-0.6244, -0.7996, -0.8288,  ..., -1.1353, -1.0039, -1.0915],
          [-0.8288, -0.8580, -0.7704,  ..., -0.9893, -0.8726, -1.0915],
          ...,
          [-1.2083, -1.1645, -1.1645,  ..., -1.4127, -1.4273, -1.4273],
          [-1.0185, -1.0477, -1.1791,  ..., -1.5587, -1.5733, -1.5879],
          [-1.0331, -1.0039, -1.0915,  ..., -0.7412, -0.7266, -0.7412]],

         [[-0.0262, -0.1613, -0.3114,  ..., -1.1068, -0.9567, -0.9417],
          [ 0.0638, -0.1613, -0.2813,  ..., -1.1818, -0.9717, -1.0467],
          [-0.2063, -0.2663, -0.2813,  ..., -1.0167, -0.8516, -1.0617],
          ...,
          [-1.0617, -1.0017, -0.9717,  ..., -1.4069, -1.4219, -1.4219],
          [-0.8666, -0.8967, -0.9717,  ..., -1.5270, -1.5420, -1.5570],
          [-0.8816, -0.8366, -0.9117,  ..., -0.6565, -0.6715, -0.6865]],

         [[ 0.3115,  0.1977,  0.0982,  ..., -1.1958, -1.1247, -1.1532],
          [ 0

In [13]:
inputs.keys()

dict_keys(['pixel_values'])

In [14]:
outputs = model1(**inputs)

In [15]:
last_hidden_state = outputs.last_hidden_state

In [16]:
# batch_size, sequence_length, hidden_size
last_hidden_state.shape

torch.Size([1, 50, 768])

In [17]:
# batch_size, hidden_size
pooled_output = outputs.pooler_output  # pooled CLS states

In [18]:
pooled_output.shape

torch.Size([1, 768])

# Clip Vision Model with Projection

- **CLIPVisionModelWithProjection**： CLIP Vision Model with a projection layer on top (a linear layer on top of the pooled output).
    - **image_embeds** (torch.FloatTensor of shape (batch_size, output_dim) optional returned when model is initialized with with_projection=True) — The image embeddings obtained by applying the projection layer to the pooler_output.
    - **last_hidden_state** (torch.FloatTensor of shape (batch_size, sequence_length, hidden_size)) — Sequence of hidden-states at the output of the last layer of the model.

In [3]:
model2 = CLIPVisionModelWithProjection.from_pretrained("openai/clip-vit-base-patch32")
processor2 = AutoProcessor.from_pretrained("openai/clip-vit-base-patch32")

In [None]:
inputs = processor2(images=image, return_tensors="pt")

outputs = model2(**inputs)
image_embeds = outputs.image_embeds

In [20]:
image_embeds.flatten()

tensor([ 5.7276e-01, -1.1683e-02, -3.8835e-01, -4.7655e-02, -1.5295e-01,
        -1.9649e-01,  2.8847e-01, -1.6525e-01, -2.9226e-01,  5.0541e-01,
        -6.9078e-01,  6.1941e-01, -7.5515e-02, -1.5732e-01,  4.8566e-01,
         5.9324e-02,  2.3344e-01,  1.0125e-01,  1.6345e-01,  8.7014e-02,
        -3.2031e-01, -1.4276e-02,  1.2343e-01,  3.8300e-01,  2.1373e-01,
         3.6731e-01,  2.2468e-01,  3.0784e-01,  3.4626e-01, -4.8405e-01,
        -1.6400e-01, -1.8532e-01, -1.6609e-01, -1.2264e-01, -1.0475e-01,
        -3.5395e-01,  1.3298e-01,  3.1300e-01,  2.9166e-01,  2.0781e-01,
        -1.4685e-01, -4.3510e-02,  3.3001e-01, -2.7783e-01,  4.0180e-01,
        -1.7722e-01, -2.8423e-01,  1.4008e-01, -1.0253e-01,  3.2990e-03,
         4.9712e-01, -3.1800e-01,  3.1565e-01, -9.5512e-03, -3.4972e-02,
        -5.1089e-02, -5.1402e-01,  5.8958e-01, -4.3651e-01,  1.4013e-01,
        -2.5110e-01, -1.9915e-01,  4.1623e-01,  9.3699e-02,  2.7740e-01,
        -2.0243e-01,  3.2845e-02,  7.0905e-01, -3.1

In [21]:
image_embeds.shape

torch.Size([1, 512])

# Extract Embeddings from Image and From a Dataset

In [19]:
def convert_image_to_numpy(example):
    example["image_numpy"] = np.asarray(example['image'])
    # inputs = processor2(images=example["image_numpy"], return_tensors="pt")
    # outputs = model2(**inputs)
    # example["CLIPVisionModelWithProjection_image_embeds"] = outputs.image_embeds
    return example

In [20]:
updated_dataset = dataset.map(convert_image_to_numpy)

In [37]:
updated_dataset.set_format(type="np", columns=['image_numpy'])

In [21]:
updated_dataset

DatasetDict({
    train: Dataset({
        features: ['title', 'artist', 'date', 'genre', 'style', 'description', 'filename', 'image', 'embeddings_pca512', 'image_numpy'],
        num_rows: 76706
    })
})

## Image Embedding

In [4]:
def image_embedding(example, processor,model,torch):
    inputs = processor(images=example["image_numpy"], return_tensors="pt")
    outputs = model(**inputs)
    example["CLIPVisionModelWithProjection_image_embeds"] = torch.squeeze(outputs.image_embeds,0)
    # The CLIP Image embedding with size [512]
    return example

In [5]:
dataset.set_format(type="np", columns=['image_numpy'])
dataset.set_format(type="np", columns=['CLIPVisionModelWithProjection_image_embeds'])

test_dataset = dataset['train'].select(range(5))

test_embedded_dataset = test_dataset.map(image_embedding,fn_kwargs={"processor": processor2, "model": model2})

test_embedded_dataset["CLIPVisionModelWithProjection_image_embeds"][0].shape

In [None]:
embeded_dataset = dataset.map(image_embedding,fn_kwargs={"processor": processor2, "model": model2,'torch':torch},num_proc=3)

Map (num_proc=3):   0%|          | 0/57529 [00:00<?, ? examples/s]

In [None]:
embeded_dataset 


Split Training and Testing

# Extract last hidden layer and CLS token from Image and From a Dataset

In [16]:
def image_cls_hidden_layer_embedding(example, processor,model,torch):
    inputs = processor(images=example["image_numpy"], return_tensors="pt")
    outputs = model(**inputs)
    example["CLIPVisionModel_hidden_state"] = torch.squeeze(outputs.last_hidden_state,0)
    example["CLIPVisionModel_CLS"] =  torch.squeeze(outputs.pooler_output,0)
    
    # The CLIP Image embedding with size [512]
    return example

In [18]:
full_dataset = embeded_dataset.map(image_cls_hidden_layer_embedding,fn_kwargs={"processor": processor1, "model": model1,"torch":torch})

In [19]:
full_dataset

IterableDatasetDict({
    train: IterableDataset({
        features: Unknown,
        num_shards: 66
    })
    test: IterableDataset({
        features: Unknown,
        num_shards: 22
    })
})

full_test_dataset = test_embedded_dataset.map(image_cls_hidden_layer_embedding,fn_kwargs={"processor": processor1, "model": model1})

full_test_dataset['CLIPVisionModel_CLS'][0]

# Update Dataset

## Train Test Split

In [56]:
new_dataset = embeded_dataset['train'].train_test_split(test_size=0.25,seed = 123)

## Upload Data to Huggingface

In [5]:
from huggingface_hub import login
login(token="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")

ConnectTimeout: (MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /api/whoami-v2 (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x0000016788074050>, 'Connection to huggingface.co timed out. (connect timeout=None)'))"), '(Request ID: b4fcd8f4-0397-4286-aed5-efca9e3866f2)')

In [57]:
new_dataset.push_to_hub("MissTiny/WikiArt")

Uploading the dataset shards:   0%|          | 0/66 [00:00<?, ?it/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/871 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/871 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/871 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/871 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/871 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/871 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/871 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/871 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/871 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/871 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/871 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/871 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/871 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/871 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/871 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/871 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/871 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/871 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/871 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/871 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/871 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/871 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/871 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Uploading the dataset shards:   0%|          | 0/22 [00:00<?, ?it/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/871 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/871 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/871 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/871 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/871 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/871 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Map:   0%|          | 0/871 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

CommitInfo(commit_url='https://huggingface.co/datasets/MissTiny/WikiArt/commit/544c532a710ec5c37cd8b09988fa2774fb93c1b8', commit_message='Upload dataset (part 00001-of-00002)', commit_description='', oid='544c532a710ec5c37cd8b09988fa2774fb93c1b8', pr_url=None, repo_url=RepoUrl('https://huggingface.co/datasets/MissTiny/WikiArt', endpoint='https://huggingface.co', repo_type='dataset', repo_id='MissTiny/WikiArt'), pr_revision=None, pr_num=None)