In [1]:
# Import statements
import pandas as pd
import numpy as np
from PIL import Image
import requests
from io import BytesIO
import os

# Set the random seed
np.random.seed(1337)

In [2]:
# Create a dataframe for the cleaned dataset
df = pd.read_csv("https://gist.githubusercontent.com/jd1771/4483a72c10b94fb28d4de22ac12d9b80/raw/1abc382362cfac751b3c784563d1afc327371e9b/clean.csv")

# Get a list of the top 10 artist names 
artist_names = df["Artist"].unique()

# Get a list of the different styles
styles = df["Style"].unique()

In [None]:
# Loop through each artist and download their paintings 

NUM_IMAGES = 450

folder = "./paintings"

os.makedirs(folder)

for artist in artist_names:

  # Make the artist folder path
  path_artist = os.path.join(folder, artist)
  os.makedirs(path_artist)
  
  # Get the top 450 paintings and an image URL list for each artist
  top_paintings = df[df['Artist']==artist].head(NUM_IMAGES)
  img_list =  top_paintings['Link'].tolist()

  for i in range(len(img_list)):

    # Get the image link
    painting_link = img_list[i]

    name = painting_link.rsplit('/', 1)[-1]

    # Get the image content
    response = requests.get(painting_link)
    img = Image.open(BytesIO(response.content))

    # Check if image is of valid RGB format
    if img.mode != 'RGB':
        continue
    
    img.save(f"{path_artist}\{name}")

In [7]:
# Loop through each artist and download their paintings 

NUM_TEST_IMAGES = int(450*0.1)

folder = "./painting_test"

os.makedirs(folder)

for artist in artist_names:

  # Make the artist folder path
  path_artist = os.path.join(folder, artist)
  os.makedirs(path_artist)
  
  # Get the top 450 paintings and an image URL list for each artist
  top_paintings = df[df['Artist']==artist].head(NUM_IMAGES+NUM_TEST_IMAGES)[NUM_IMAGES+1:]
  img_list =  top_paintings['Link'].tolist()

  for i in range(len(img_list)):

    # Get the image link
    painting_link = img_list[i]

    name = painting_link.rsplit('/', 1)[-1]

    # Get the image content
    response = requests.get(painting_link)
    img = Image.open(BytesIO(response.content))

    # Check if image is of valid RGB format
    if img.mode != 'RGB':
        continue
    
    img.save(f"{path_artist}\{name}")

In [14]:
import math
# Loop through each style and download paintings 

NUM_IMAGES = 450

folder = "./painting_styles_v5"
folder_test = "./styles_test_v5"

os.makedirs(folder)
os.makedirs(folder_test)

for style in styles:

  # Get the top 450 paintings and an image URL list for each artist
  top_paintings = df[df['Style']==style].head(NUM_IMAGES)
  img_list =  top_paintings['Link'].tolist()

  if top_paintings.shape[0] < 50:
    # not enough painting in that style
    continue

  # Make the style folder path
  path_style = os.path.join(folder, style)
  test_path_style = os.path.join(folder_test, style)
  os.makedirs(path_style)
  os.makedirs(test_path_style)

  train_len = math.floor(len(img_list)*0.9)
  test_len = math.floor(len(img_list)*0.1) # put aside 0.1 for test

  print("train_len for {}: {}, test_len: {}".format(style,
                                                    train_len,
                                                    test_len))

  for i in range(train_len): 

    # Get the image link
    painting_link = img_list[i]

    name = painting_link.rsplit('/', 1)[-1]

    # Get the image content
    response = requests.get(painting_link)
    img = Image.open(BytesIO(response.content))

    # Check if image is of valid RGB format
    if img.mode != 'RGB':
        continue
    
    img.save(f"{path_style}\{name}")

  for i in range(test_len):

    # Get the image link
    painting_link = img_list[i+train_len]

    name = painting_link.rsplit('/', 1)[-1]

    # Get the image content
    response = requests.get(painting_link)
    img = Image.open(BytesIO(response.content))

    # Check if image is of valid RGB format
    if img.mode != 'RGB':
        continue
    
    img.save(f"{test_path_style}\{name}")

train_len for Northern-Renaissance: 405, test_len: 45
train_len for Neo-Expressionism: 65, test_len: 7
train_len for Neo-Figurative-Art: 70, test_len: 7
train_len for Contemporary-Realism: 45, test_len: 5
train_len for Fauvism: 228, test_len: 25
train_len for Impressionism: 405, test_len: 45
train_len for Neoclassicism: 405, test_len: 45
train_len for Expressionism: 292, test_len: 32
train_len for Abstract-Expressionism: 104, test_len: 11
train_len for Post-Impressionism: 405, test_len: 45
train_len for Cubism: 222, test_len: 24
train_len for Naïve-Art-(Primitivism): 405, test_len: 45
train_len for Surrealism: 405, test_len: 45
train_len for Synthetic-Cubism: 50, test_len: 5
train_len for Tenebrism: 73, test_len: 8
train_len for Baroque: 405, test_len: 45


In [8]:
!zip -r /content/painting_test.zip /content/painting_test/

  adding: content/painting_test/ (stored 0%)
  adding: content/painting_test/Rembrandt\the-resurrection-of-lazurus-a-small-plate-1642.jpg (deflated 1%)
  adding: content/painting_test/Pablo Picasso\untitled-1937-8.jpg (deflated 1%)
  adding: content/painting_test/Marc Chagall\selifan-interrupts-petrouchka-cleaning.jpg (deflated 0%)
  adding: content/painting_test/Giovanni Battista Piranesi\other-statement-of-the-curule-chair-already-illustrated-in-the-preceding-tables.jpg (deflated 1%)
  adding: content/painting_test/Rembrandt\the-mountebank-1635.jpg (deflated 0%)
  adding: content/painting_test/Salvador Dali\spider-of-the-evening.jpg (deflated 0%)
  adding: content/painting_test/Rembrandt\the-rape-of-europe.jpg (deflated 0%)
  adding: content/painting_test/Salvador Dali\space-elephant.jpg (deflated 1%)
  adding: content/painting_test/Pablo Picasso\portrait-of-mateu-fernandez-de-soto-1901.jpg (deflated 0%)
  adding: content/painting_test/Marc Chagall\sobakevitch.jpg (deflated 2%)
  add

In [15]:
!zip -r /content/painting_styles_v5.zip /content/painting_styles_v5/
!zip -r /content/styles_test_v5.zip /content/styles_test_v5/

  adding: content/painting_styles_v5/ (stored 0%)
  adding: content/painting_styles_v5/Surrealism\head-of-crying-woman.jpg (deflated 0%)
  adding: content/painting_styles_v5/Post-Impressionism\donkey-and-cart(1).jpg (deflated 0%)
  adding: content/painting_styles_v5/Post-Impressionism\beach-sea-and-fishing-boats-1888(1).jpg (deflated 0%)
  adding: content/painting_styles_v5/Baroque\bust-of-an-old-man-1631.jpg (deflated 2%)
  adding: content/painting_styles_v5/Northern-Renaissance\martyrdom-of-the-ten-thousand-1508.jpg (deflated 0%)
  adding: content/painting_styles_v5/Northern-Renaissance\crucifixion-1497.jpg (deflated 0%)
  adding: content/painting_styles_v5/Naïve-Art-(Primitivism)\morning-tea.jpg (deflated 1%)
  adding: content/painting_styles_v5/Baroque\a-man-in-an-arboug-1642.jpg (deflated 1%)
  adding: content/painting_styles_v5/Surrealism\erotic-beach.jpg (deflated 1%)
  adding: content/painting_styles_v5/Naïve-Art-(Primitivism)\christ-as-a-clock-1957.jpg (deflated 0%)
  adding: 