In [1]:
# Dataset: 
# @article{mao2019visual,
#   title={Visual Arts Search on Mobile Devices},
#   author={Mao, Hui and She, James and Cheung, Ming},
#   journal={ACM Transactions on Multimedia Computing, Communications, and Applications (TOMM)},
#   volume={15},
#   number={2s},
#   pages={60},
#   year={2019},
#   publisher={ACM}
# }
# https://deepart.hkust.edu.hk/ART500K/art500k.html

In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import sklearn

In [None]:
paintings = pd.read_csv("toy_dataset_label.csv", sep="\t")
paintings

Unnamed: 0,ID,FILE,AUTHOR,BORN-DIED,TITLE,DATE,TECHNIQUE,LOCATION,FORM,TYPE,SCHOOL,TIMELINE,URL
0,1,1.jpg,"AACHEN, Hans von","(b. 1552, Köln, d. 1615, Praha)",Allegory,1598,"Oil on copper, 56 x 47 cm","Alte Pinakothek, Munich",painting,mythological,German,1601-1650,http://www.wga.hu/html/a/aachen/allegory.html
1,2,2.jpg,"AACHEN, Hans von","(b. 1552, Köln, d. 1615, Praha)","Bacchus, Ceres and Cupid",-,"Oil on canvas, 163 x 113 cm","Kunsthistorisches Museum, Vienna",painting,mythological,German,1601-1650,http://www.wga.hu/html/a/aachen/bacchus.html
2,3,3.jpg,"AACHEN, Hans von","(b. 1552, Köln, d. 1615, Praha)",Joking Couple,-,"Copperplate, 25 x 20 cm","Kunsthistorisches Museum, Vienna",painting,genre,German,1601-1650,http://www.wga.hu/html/a/aachen/j_couple.html
3,4,4.jpg,"AACHEN, Hans von","(b. 1552, Köln, d. 1615, Praha)",Portrait of Emperor Rudolf II,1590s,"Oil on canvas, 60 x 48 cm","Kunsthistorisches Museum, Vienna",painting,portrait,German,1601-1650,http://www.wga.hu/html/a/aachen/rudolf2.html
4,5,5.jpg,"AACHEN, Hans von","(b. 1552, Köln, d. 1615, Praha)",Self-Portrait with a Glass of Wine,c. 1596,"Oil on canvas, 53 x 44 cm",Private collection,painting,portrait,German,1601-1650,http://www.wga.hu/html/a/aachen/selfport.html
...,...,...,...,...,...,...,...,...,...,...,...,...,...
43450,43451,43451.jpg,"ZÜRN, Martin","(b. ca. 1585, Waldsee, d. ca. 1655, Waldsee)",Holy Knight St Sebastian,1638-39,"Wood, height 289 cm","Staatliche Museen, Berlin",sculpture,religious,German,1601-1650,http://www.wga.hu/html/z/zurn/martin/knight2.html
43451,43452,43452.jpg,"ZÜRN, Martin","(b. ca. 1585, Waldsee, d. ca. 1655, Waldsee)",Madonna with Child,-,"Polychromed limewood, height 61 cm",Private collection,sculpture,religious,German,1601-1650,http://www.wga.hu/html/z/zurn/martin/madchil.html
43452,43453,43453.jpg,"ZÜRN, Martin","(b. ca. 1585, Waldsee, d. ca. 1655, Waldsee)",Madonna with Child (detail),-,Polychromed limewood,Private collection,sculpture,religious,German,1601-1650,http://www.wga.hu/html/z/zurn/martin/madchild....
43453,43454,43454.jpg,"ZÜRN, Martin","(b. ca. 1585, Waldsee, d. ca. 1655, Waldsee)",St Sebastian,c. 1650,Limewood,"Liebighaus, Frankfurt",sculpture,religious,German,1601-1650,http://www.wga.hu/html/z/zurn/martin/sebastia....


In [4]:
paintings['TYPE'].value_counts()

TYPE
religious       17839
portrait         5640
landscape        4342
mythological     4047
other            3785
genre            2857
still-life       1400
study            1309
historical       1171
interior         1065
Name: count, dtype: int64

In [None]:
import os
import shutil

paintings = pd.read_csv("paint_data_labels_.csv", sep="\t") # csv file labels that is being read
labels_to_remove = ['religious', 'interior', 'other', 'genre', 'study', 'historical', 'mythological']

paintings_cleaned = paintings[~paintings['TYPE'].isin(labels_to_remove)]
files_to_remove = paintings[paintings['TYPE'].isin(labels_to_remove)]

image_folder = 'paint_data_1/' # old painting folder
painting_folder = 'paint_data/' # new painting folder after sorting; should be empty and will align with new painting csv

if not os.path.exists(painting_folder):
    os.makedirs(painting_folder)

for _, row in paintings.iterrows():
    file_name = row['FILE']
    file_name = str(file_name) if pd.notna(file_name) else None
    
    if file_name:
        file_path = os.path.join(image_folder, file_name)
        image_id = row['ID']
        
        if row['TYPE'] in labels_to_remove:
            if os.path.exists(file_path):
                os.remove(file_path)
                print(f"Deleted image with ID: {image_id}, File: {file_name}")
            else:
                print(f"File not found: {file_name}, ID: {image_id}")
        else:
            new_file_path = os.path.join(painting_folder, file_name)
            if os.path.exists(file_path):
                shutil.move(file_path, new_file_path)
                print(f"Moved image with ID: {image_id}, File: {file_name} to {painting_folder}")
            else:
                print(f"File not found for moving: {file_name}, ID: {image_id}")
    else:
        print(f"Invalid file name (NaN or non-string) for ID: {row['ID']}")
        
paintings_cleaned.to_csv("paint_data_labels.csv", sep="\t", index=False) # new csv files

print(f"Labels and images for {', '.join(labels_to_remove)} have been removed or moved.")


Deleted image with ID: 1, File: 1.jpg
Deleted image with ID: 2, File: 2.jpg
Moved image with ID: 4, File: 4.jpg to paint_data/
Moved image with ID: 5, File: 5.jpg to paint_data/
Moved image with ID: 6, File: 6.jpg to paint_data/
Moved image with ID: 7, File: 7.jpg to paint_data/
Moved image with ID: 15, File: 15.jpg to paint_data/
Deleted image with ID: 16, File: 16.jpg
Deleted image with ID: 17, File: 17.jpg
Deleted image with ID: 18, File: 18.jpg
Moved image with ID: 19, File: 19.jpg to paint_data/
Moved image with ID: 20, File: 20.jpg to paint_data/
Moved image with ID: 21, File: 21.jpg to paint_data/
Moved image with ID: 22, File: 22.jpg to paint_data/
Moved image with ID: 29, File: 29.jpg to paint_data/
Moved image with ID: 30, File: 30.jpg to paint_data/
Deleted image with ID: 34, File: 34.jpg
Deleted image with ID: 35, File: 35.jpg
Moved image with ID: 36, File: 36.jpg to paint_data/
Deleted image with ID: 41, File: 41.jpg
Deleted image with ID: 42, File: 42.jpg
Deleted image wi

In [5]:
paint = pd.read_csv('paint_data_labels.csv', sep='\t')
paint

Unnamed: 0,ID,FILE,AUTHOR,BORN-DIED,TITLE,DATE,TECHNIQUE,LOCATION,FORM,TYPE,SCHOOL,TIMELINE,URL
0,4,4.jpg,"AACHEN, Hans von","(b. 1552, Köln, d. 1615, Praha)",Portrait of Emperor Rudolf II,1590s,"Oil on canvas, 60 x 48 cm","Kunsthistorisches Museum, Vienna",painting,portrait,German,1601-1650,http://www.wga.hu/html/a/aachen/rudolf2.html
1,5,5.jpg,"AACHEN, Hans von","(b. 1552, Köln, d. 1615, Praha)",Self-Portrait with a Glass of Wine,c. 1596,"Oil on canvas, 53 x 44 cm",Private collection,painting,portrait,German,1601-1650,http://www.wga.hu/html/a/aachen/selfport.html
2,6,6.jpg,"AAGAARD, Carl Frederik","(b. 1833, Odense, d. 1895, København)",Deer beside a Lake,1888,"Oil on canvas, 53 x 82 cm",Private collection,painting,landscape,Danish,1851-1900,http://www.wga.hu/html/a/aagaard/deerlake.html
3,7,7.jpg,"AAGAARD, Carl Frederik","(b. 1833, Odense, d. 1895, København)",The Rose Garden,1877,"Oil on canvas, 98 x 80 cm",Private collection,painting,landscape,Danish,1851-1900,http://www.wga.hu/html/a/aagaard/rosegard.html
4,15,15.jpg,"ABBATE, Niccolò dell'","(b. 1509, Modena, d. 1571, Fontainebleau)",Stag Hunt,1550-52,"Oil on canvas, 116 x 159 cm","Galleria Borghese, Rome",painting,landscape,Italian,1501-1550,http://www.wga.hu/html/a/abbate/deerhunt.html
...,...,...,...,...,...,...,...,...,...,...,...,...,...
11377,43421,43421.jpg,"ZURBARÁN, Francisco de","(b. 1598, Fuente de Cantos, d. 1664, Madrid)",Cup of Water and a Rose on a Silver Plate,c. 1630,"Oil on canvas, 21,2 x 30,1 cm","National Gallery, London",painting,still-life,Spanish,1601-1650,http://www.wga.hu/html/z/zurbaran/1/stil_lif.html
11378,43422,43422.jpg,"ZURBARÁN, Francisco de","(b. 1598, Fuente de Cantos, d. 1664, Madrid)",Still-Life with Pottery Jars,c. 1660,"Oil on canvas, 46 x 84 cm","Museo del Prado, Madrid",painting,still-life,Spanish,1601-1650,http://www.wga.hu/html/z/zurbaran/1/still_li.html
11379,43423,43423.jpg,"ZURBARÁN, Francisco de","(b. 1598, Fuente de Cantos, d. 1664, Madrid)","Still-life with Lemons, Oranges and Rose",1633,"Oil on canvas, 60 x 107 cm","Norton Simon Museum of Art, Pasadena",painting,still-life,Spanish,1601-1650,http://www.wga.hu/html/z/zurbaran/1/stillife.html
11380,43437,43437.jpg,"ZURBARÁN, Francisco de","(b. 1598, Fuente de Cantos, d. 1664, Madrid)",Portrait of the Duke of Medinaceli,-,Oil on canvas,"Hospital of Tavera, Toledo",painting,portrait,Spanish,1601-1650,http://www.wga.hu/html/z/zurbaran/2/port_med.html


In [6]:
paint['TYPE'].value_counts() 

TYPE
portrait      5640
landscape     4342
still-life    1400
Name: count, dtype: int64