In [1]:
import pandas as pd
import json
import numpy as np
import os


Use the 101.csv file to filter the most popular artists from the Art Institute of Chicago file (allArtwork.json)

In [2]:
# Read the CSV file into a DataFrame
csv101_df = pd.read_csv('101.csv')

In [3]:
# Read the JSONL file
allArtworks = []
with open(r"api-data\getting-started\allArtworks.jsonl", "r") as rows:
    for row in rows:
        allArtworks.append(json.loads(row))


artworks_list = artworks_dates["data"]

data = []

# Iterate over the artworks and extract the desired fields
for artwork in artworks_list:
    artwork_id = artwork["id"]
    title = artwork["title"]
    date_display = artwork["date_display"]
    
    # Append the data to the list
    data.append({"Artwork ID": artwork_id, "Title": title, "Date Display": date_display})

# Create a DataFrame from the list of dictionaries
df = pd.DataFrame(data)

# Display the DataFrame
print(df)


In [9]:
# Create a DataFrame from the list of JSON objects
allArtworks_df = pd.DataFrame(allArtworks)

In [19]:
# Convert the artist_title column so it's case-insensitive
allArtworks_df['artist_title_lower'] = allArtworks_df['artist_title'].str.lower()

# Change column name from 'department_title' to 'classification'
allArtworks_df = allArtworks_df.rename(columns={"department_title": "artwork_genre", "artist_title": "artist", "title": "title_of_art"})

# Classifications to be removed
class_removal = ['Photography and Media', 'Textiles','Applied Arts of Europe','AIC Archives']

# Remove unwanted classifications
allArtworks_df = allArtworks_df[~allArtworks_df["artwork_genre"].isin(class_removal)]

allArtworks_df.head()

Unnamed: 0,id,title_of_art,main_reference_number,artwork_genre,artist,artist_title_lower
0,4,Priest and Boy,1880.1,Prints and Drawings,Lawrence Carmichael Earle,lawrence carmichael earle
1,9,"Interior of St. Mark's, Venice",1887.232,Arts of the Americas,David Dalhoff Neal,david dalhoff neal
2,11,Self-Portrait,1887.234,Arts of the Americas,Walter Shirlaw,walter shirlaw
3,16,The Fall of the Giants,1887.249,Prints and Drawings,Salvator Rosa,salvator rosa
4,19,"View of Ponte Lugano on the Anio, from Views o...",1887.252,Prints and Drawings,Giovanni Battista Piranesi,giovanni battista piranesi


In [20]:
# Add 'artist_title_lower' to make case-insensitive 
csv101_df["artist_title_lower"] = csv101_df['nameP'].str.lower()
csv101_df["artist_title_lower"] = csv101_df["artist_title_lower"].str.strip()

csv101_df.head()

Unnamed: 0,nameP,colloq_name,last_name,lifespanP,separator,bio,artist_title_lower
0,Pablo Picasso,Picasso,Picasso,1881-1973,1,"Love him or hate him, Pablo Picasso changed it...",pablo picasso
1,Giotto Di Bondone,Giotto,Di Bondone,c.1267-1337,1,The greatest renovator of early European paint...,giotto di bondone
2,Leonardo Da Vinci,Leonardo,Da Vinci,1452-1519,1,There is no artist more legendary than Leonard...,leonardo da vinci
3,Paul Cézanne,Cézanne,Cézanne,1839-1906,1,“Cézanne is the father of us all”. This lapida...,paul cézanne
4,Rembrandt Van Rijn,Rembrandt,Rijn,1606-1669,2,"The fascinating, magnetic interplay of light a...",rembrandt van rijn


In [37]:
merged_df = allArtworks_df.merge(csv101_df, on='artist_title_lower', how='inner')
merged_df['museum'] = 'Art_Institute_of_Chicago'

merged_df.head()


Unnamed: 0,id,title_of_art,main_reference_number,artwork_genre,artist,artist_title_lower,nameP,colloq_name,last_name,lifespanP,separator,bio,museum
0,74,Three Oriental Figures (Jacob and Laban?),1887.462,Prints and Drawings,Rembrandt van Rijn,rembrandt van rijn,Rembrandt Van Rijn,Rembrandt,Rijn,1606-1669,2,"The fascinating, magnetic interplay of light a...",Art_Institute_of_Chicago
1,4196,"The Artist's Mother Seated at a Table, Looking...",1956.981,Prints and Drawings,Rembrandt van Rijn,rembrandt van rijn,Rembrandt Van Rijn,Rembrandt,Rijn,1606-1669,2,"The fascinating, magnetic interplay of light a...",Art_Institute_of_Chicago
2,9596,The First Oriental Head,1923.1108,Prints and Drawings,Rembrandt van Rijn,rembrandt van rijn,Rembrandt Van Rijn,Rembrandt,Rijn,1606-1669,2,"The fascinating, magnetic interplay of light a...",Art_Institute_of_Chicago
3,12909,Kostverloren Castle in Decay,1961.49,Prints and Drawings,Rembrandt van Rijn,rembrandt van rijn,Rembrandt Van Rijn,Rembrandt,Rijn,1606-1669,2,"The fascinating, magnetic interplay of light a...",Art_Institute_of_Chicago
4,13081,Self-Portrait in a Flat Cap and Embroidered Dress,1924.613,Prints and Drawings,Rembrandt van Rijn,rembrandt van rijn,Rembrandt Van Rijn,Rembrandt,Rijn,1606-1669,2,"The fascinating, magnetic interplay of light a...",Art_Institute_of_Chicago


In order to filter out the creation date of the artwork, open the json file named for the id in the artwork folder.  Append to table.

In [38]:
# Extract creation date of artwork (id)
def extract_date_display(row):
    number = row['id']
    filename = f"{number}.json"  # JSON files are named after the artwork id numbers
    file_path = os.path.join(r'api-data\json\artworks', filename)  # Update the path

    if os.path.exists(file_path):
        with open(file_path) as json_file:
            data = json.load(json_file)
            date_display = data.get('date_display')  # Assuming 'date_display' is the key in the JSON file
            return date_display
    else:
        return None

In [39]:
# Add creation_date to dataframe
merged_df['creation_date'] =merged_df.apply(extract_date_display, axis=1)
merged_df = merged_df.drop(columns=['nameP'])  # Drop duplicate column; Same as artist

merged_df.head()

Unnamed: 0,id,title_of_art,main_reference_number,artwork_genre,artist,artist_title_lower,colloq_name,last_name,lifespanP,separator,bio,museum,creation_date
0,74,Three Oriental Figures (Jacob and Laban?),1887.462,Prints and Drawings,Rembrandt van Rijn,rembrandt van rijn,Rembrandt,Rijn,1606-1669,2,"The fascinating, magnetic interplay of light a...",Art_Institute_of_Chicago,1641
1,4196,"The Artist's Mother Seated at a Table, Looking...",1956.981,Prints and Drawings,Rembrandt van Rijn,rembrandt van rijn,Rembrandt,Rijn,1606-1669,2,"The fascinating, magnetic interplay of light a...",Art_Institute_of_Chicago,c. 1631
2,9596,The First Oriental Head,1923.1108,Prints and Drawings,Rembrandt van Rijn,rembrandt van rijn,Rembrandt,Rijn,1606-1669,2,"The fascinating, magnetic interplay of light a...",Art_Institute_of_Chicago,1635
3,12909,Kostverloren Castle in Decay,1961.49,Prints and Drawings,Rembrandt van Rijn,rembrandt van rijn,Rembrandt,Rijn,1606-1669,2,"The fascinating, magnetic interplay of light a...",Art_Institute_of_Chicago,c. 1652
4,13081,Self-Portrait in a Flat Cap and Embroidered Dress,1924.613,Prints and Drawings,Rembrandt van Rijn,rembrandt van rijn,Rembrandt,Rijn,1606-1669,2,"The fascinating, magnetic interplay of light a...",Art_Institute_of_Chicago,c. 1642


In [41]:
artworkAIC = merged_df[['artist', 'title_of_art', 'museum', 'artwork_genre', 'creation_date']]

artworkAIC

Unnamed: 0,artist,title_of_art,museum,artwork_genre,creation_date
0,Rembrandt van Rijn,Three Oriental Figures (Jacob and Laban?),Art_Institute_of_Chicago,Prints and Drawings,1641
1,Rembrandt van Rijn,"The Artist's Mother Seated at a Table, Looking...",Art_Institute_of_Chicago,Prints and Drawings,c. 1631
2,Rembrandt van Rijn,The First Oriental Head,Art_Institute_of_Chicago,Prints and Drawings,1635
3,Rembrandt van Rijn,Kostverloren Castle in Decay,Art_Institute_of_Chicago,Prints and Drawings,c. 1652
4,Rembrandt van Rijn,Self-Portrait in a Flat Cap and Embroidered Dress,Art_Institute_of_Chicago,Prints and Drawings,c. 1642
...,...,...,...,...,...
3950,Frans Hals,Portrait of a Lady,Art_Institute_of_Chicago,Painting and Sculpture of Europe,1627
3951,Giotto di Bondone,Prayer for the Flowering of the Rods,Art_Institute_of_Chicago,Prints and Drawings,n.d.
3952,Giotto di Bondone,Ascension of Christ,Art_Institute_of_Chicago,Prints and Drawings,n.d.
3953,Giotto di Bondone,Wedding Procession of Mary and Joseph,Art_Institute_of_Chicago,Prints and Drawings,n.d.


In [42]:
# Convert DataFrame to CSV file
artworkAIC.to_csv('aic.csv', index=False)

In [46]:
# Count the number of classifications
unique_counts = artworkAIC['artwork_genre'].value_counts()
print(unique_counts)

artwork_genre
Prints and Drawings                                  3595
Modern Art                                            120
Painting and Sculpture of Europe                      115
Contemporary Art                                       56
Ryerson and Burnham Libraries Special Collections      18
Arts of the Americas                                   12
Name: count, dtype: int64


In [47]:
# Count the number of classifications
artist_counts = artworkAIC['artist'].value_counts()
print(artist_counts)

print(len(artist_counts))

artist
Jasper Johns               841
Pablo Picasso              369
Albrecht Dürer             258
Rembrandt van Rijn         238
Paul Gauguin               217
Henri Matisse              211
Winslow Homer              184
Eugène Delacroix           122
Marc Chagall               114
Edvard Munch               113
Édouard Manet              109
Paul Klee                  102
David Hockney               76
Joan Miró                   73
Pierre-Auguste Renoir       66
Roy Lichtenstein            60
Andy Warhol                 60
William Hogarth             56
Max Ernst                   53
Georges Braque              49
Claude Monet                47
Gerhard Richter             44
Fernand Léger               40
Claude Lorrain              40
Salvador Dalí               39
Edward Hopper               30
William Blake               29
Andrea Mantegna             29
Grant Wood                  24
Marcel Duchamp              22
Michelangelo Buonarroti     18
Vincent van Gogh            18
G