# Set-up

In [2]:
# libraries
import re
import numpy as np
import pandas as pd
from pymongo import MongoClient

In [3]:
# let's connect to the localhost
client = MongoClient()

# let's create a database 
db = client.moma

# collection
artworks = db.artworks

# print connection
print("""
Database
==========
{}

Collection
==========
{}
""".format(db, artworks), flush=True
)


Database
Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True), 'moma')

Collection
Collection(Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True), 'moma'), 'artworks')



## Data

![MoMa](https://images.musement.com/cover/0001/31/moma-museum-of-modern-art-tickets-tours-jpg_header-30520.jpeg?&q=60&fit=crop)

In [4]:
df = pd.read_csv('https://media.githubusercontent.com/media/MuseumofModernArt/collection/master/Artworks.csv')

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 138415 entries, 0 to 138414
Data columns (total 29 columns):
 #   Column              Non-Null Count   Dtype  
---  ------              --------------   -----  
 0   Title               138376 non-null  object 
 1   Artist              137103 non-null  object 
 2   ConstituentID       137103 non-null  object 
 3   ArtistBio           132341 non-null  object 
 4   Nationality         137103 non-null  object 
 5   BeginDate           137103 non-null  object 
 6   EndDate             137103 non-null  object 
 7   Gender              137103 non-null  object 
 8   Date                136187 non-null  object 
 9   Medium              128168 non-null  object 
 10  Dimensions          128247 non-null  object 
 11  CreditLine          135915 non-null  object 
 12  AccessionNumber     138415 non-null  object 
 13  Classification      138415 non-null  object 
 14  Department          138415 non-null  object 
 15  DateAcquired        131267 non-nul

# Simple loading

In [5]:
# simple and slow loading of data
d = {}

for i in df.index:
    d = {
        "_id": str(df.loc[i, "Cataloged"]) + str(df.loc[i, "ObjectID"]),
        "Title": df.loc[i, "Title"],
        "Date": df.loc[i, "Date"],
        "Artist": {
            "Name": df.loc[i, "Artist"],
            "Bio": df.loc[0, "ArtistBio"],
            "Nationality": df.loc[0, "Nationality"],
            "Birth": df.loc[0, "BeginDate"],
            "Death": df.loc[0, "EndDate"],
            "Gender": df.loc[0, "Gender"]
        },
        "Characteristics":{
            "Medium": df.loc[i,'Medium'], 
            "Dimensions": df.loc[i,'Dimensions'],
            "Circumference": df.loc[i,'Circumference (cm)'], 
            "Depth": df.loc[i,'Depth (cm)'], 
            "Diameter": df.loc[i,'Diameter (cm)'], 
            "Height": df.loc[i,'Height (cm)'],
            "Length": df.loc[i,'Length (cm)'], 
            "Weight": df.loc[i,'Weight (kg)'], 
            "Width": df.loc[i,'Width (cm)'], 
            "Seat Height": df.loc[i,'Seat Height (cm)'],
            "Duration": df.loc[i,'Duration (sec.)']
        },
        "Acquisition": {
            "Date": df.loc[i, "DateAcquired"],
            "CreditLine": df.loc[i, "CreditLine"],
            "Number": df.loc[i, "AccessionNumber"]
        },
        "Classification": df.loc[i, "Classification"],
        "Department": df.loc[i, "Department"],
        "URL": df.loc[i, "URL"], 
        "ThumbnailURL": df.loc[i, "ThumbnailURL"]
    }
    artworks.insert_one(d)

In [6]:
# for further reference https://docs.mongodb.com/manual/reference/command/collStats/
stats = db.command("collstats", "artworks")
s0 = stats.get('size')/10**6

print("""
Namespace: {}

Document Count: {}

Size: {}

""".format(stats.get('ns'), stats.get('count'), s0), flush=True)


Namespace: moma.artworks

Document Count: 138415

Size: 116.386556




## Cleaning

In [7]:
# get key names
l = []
for i in d.keys():
    try:
        for b in d.get(str(i)).keys():
            l.append(str(i) + '.' + str(b))
    except:
        l.append(i)

In [8]:
# unset NaN fields
for i in l:
    update = artworks.update_many({str(i):np.nan},{"$unset": {str(i):""}})
    print("""
    Key: {}
    Matched: {}
    Modified: {}
    ------------
    """.format(i, update.matched_count, update.modified_count), flush=True)


    Key: _id
    Matched: 0
    Modified: 0
    ------------
    

    Key: Title
    Matched: 39
    Modified: 39
    ------------
    

    Key: Date
    Matched: 2228
    Modified: 2228
    ------------
    

    Key: Artist.Name
    Matched: 1312
    Modified: 1312
    ------------
    

    Key: Artist.Bio
    Matched: 0
    Modified: 0
    ------------
    

    Key: Artist.Nationality
    Matched: 0
    Modified: 0
    ------------
    

    Key: Artist.Birth
    Matched: 0
    Modified: 0
    ------------
    

    Key: Artist.Death
    Matched: 0
    Modified: 0
    ------------
    

    Key: Artist.Gender
    Matched: 0
    Modified: 0
    ------------
    

    Key: Characteristics.Medium
    Matched: 10247
    Modified: 10247
    ------------
    

    Key: Characteristics.Dimensions
    Matched: 10168
    Modified: 10168
    ------------
    

    Key: Characteristics.Circumference
    Matched: 138405
    Modified: 138405
    ------------
    

    Key: Characteristics.D

In [9]:
# for further reference https://docs.mongodb.com/manual/reference/command/collStats/
stats = db.command("collstats", "artworks")
s1 = stats.get('size')/10**6

print("""
Namespace: {}

Document Count: {}

Size: {}

Var. Size: {}

""".format(stats.get('ns'), stats.get('count'), s1, round(s0-s1, 2)), flush=True)


Namespace: moma.artworks

Document Count: 138415

Size: 95.856922

Var. Size: 20.53




## Further Cleaning

In [10]:
# change data type
update = artworks.update_many({"Date":{"$regex": '^[0-9]*$'}}, [{ "$set": { "Date": { "$toInt": "$Date" } } }])

print("""
    Key: {}
    Matched: {}
    Modified: {}
    ------------
    """.format("Date", update.matched_count, update.modified_count), flush=True)


    Key: Date
    Matched: 88769
    Modified: 88769
    ------------
    


In [11]:
# create an array field to store ranges
for i in artworks.find({"Date":{"$regex": '^[0-9]{4}-[0-9]{4}$'}}):
    date = i.get('Date').split('-')
    a = int(date[0])
    b = int(date[1])
    id = i.get('_id')
    update = artworks.update_one({"_id": str(id)},{"$set": {"Date": [a, b]}})
    print(update.matched_count, update.modified_count)
    
for i in artworks.find({"Date":{"$regex": '^[0-9]{4}–[0-9]{4}$'}}):
    date = i.get('Date').split('–')
    a = int(date[0])
    b = int(date[1])
    id = i.get('_id')
    update = artworks.update_one({"_id": str(id)},{"$set": {"Date": [a, b]}})
    print(update.matched_count, update.modified_count)

for i in artworks.find({"Date": {"$regex": '^[0-9]{4}-[0-9]{2}$'}}, {"Date": 1}):
    date = i.get('Date').split('-')
    a = int(date[0])
    b = int(date[0][0] + date[0][1] + date[1])
    id = i.get('_id')
    update = artworks.update_one({"_id": str(id)},{"$set": {"Date": [a, b]}})
    print(update.matched_count, update.modified_count)

for i in artworks.find({"Date": {"$regex": '^[0-9]{4}–[0-9]{2}$'}}, {"Date": 1}):
    date = i.get('Date').split('–')
    a = int(date[0])
    b = int(date[0][0] + date[0][1]+ date[1])
    id = i.get('_id')
    update = artworks.update_one({"_id": str(id)},{"$set": {"Date": [a, b]}})
    print(update.matched_count, update.modified_count)

# perform some further cleaning
for i in artworks.find({"Date":{"$regex": '^c. [0-9]{4}$'}}):
    date = i.get('Date').split(' ')
    b = int(date[1])
    id = i.get('_id')
    update = artworks.update_one({"_id": str(id)},{"$set": {"Date": b}})
    print(update.matched_count, update.modified_count)

1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1


1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1


1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1


1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1


1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1


1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1


1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1


1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1


1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1


1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1


1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1


In [12]:
# remove Unknown or n.d.
update = artworks.update_many({"Date": {"$in": ["n.d.", "Unknown", "unknown"]}}, {"$unset": {"Date": ""}})
print("""
    Matched: {}
    Modified: {}
    """.format(update.matched_count, update.modified_count), flush=True)


    Matched: 948
    Modified: 948
    


In [13]:
for i in artworks.find({"Date": {"$type": "string"}}, {"Date":1}):
    print(i)

{'_id': 'Y224', 'Date': 'c. 1989-91'}
{'_id': 'Y244', 'Date': 'c. 1915-17'}
{'_id': 'Y245', 'Date': 'c. 1915-17'}
{'_id': 'Y246', 'Date': 'c. 1915-17'}
{'_id': 'Y249', 'Date': 'c. 1915-17'}
{'_id': 'Y259', 'Date': 'c.1985'}
{'_id': 'Y270', 'Date': '1985.'}
{'_id': 'Y273', 'Date': '1986.'}
{'_id': 'Y275', 'Date': '1985.'}
{'_id': 'N297', 'Date': '.1-3 1987; .4 1990'}
{'_id': 'Y306', 'Date': 'c. 1929-30'}
{'_id': 'Y318', 'Date': 'c. 1918-20'}
{'_id': 'Y319', 'Date': 'c.1918-1920'}
{'_id': 'Y324', 'Date': 'c.1976'}
{'_id': 'Y326', 'Date': 'c.1976'}
{'_id': 'Y328', 'Date': 'c.1976'}
{'_id': 'Y345', 'Date': '1989.'}
{'_id': 'Y454', 'Date': ' 1961'}
{'_id': 'Y490', 'Date': 'c. 1978-84'}
{'_id': 'Y495', 'Date': 'c. 1978-84'}
{'_id': 'Y500', 'Date': 'c. 1978-84'}
{'_id': 'Y515', 'Date': 'before 1933'}
{'_id': 'Y564', 'Date': 'c. 1960-62'}
{'_id': 'Y651', 'Date': 'after 1938'}
{'_id': 'Y653', 'Date': 'after 1938'}
{'_id': 'Y715', 'Date': 'after 1938'}
{'_id': 'Y723', 'Date': 'after 1938'}
{'_id

{'_id': 'Y3793', 'Date': 'After 1933'}
{'_id': 'Y3794', 'Date': 'After 1933'}
{'_id': 'Y3798', 'Date': 'After 1933'}
{'_id': 'Y3799', 'Date': 'After 1933'}
{'_id': 'Y3800', 'Date': 'After 1933'}
{'_id': 'Y3801', 'Date': 'After 1933'}
{'_id': 'Y3802', 'Date': 'After 1933'}
{'_id': 'Y3815', 'Date': 'c. 1927-35'}
{'_id': 'Y3925', 'Date': 'c. 1949-53'}
{'_id': 'Y3935', 'Date': '1960s'}
{'_id': 'Y3942', 'Date': '1940s'}
{'_id': 'Y3980', 'Date': 'Before 1956'}
{'_id': 'Y3983', 'Date': 'Before 1956'}
{'_id': 'Y3987', 'Date': 'c. 1932-66'}
{'_id': 'Y3992', 'Date': 'c. 1932-66'}
{'_id': 'Y3993', 'Date': 'c. 1932-66'}
{'_id': 'Y3994', 'Date': 'c. 1932-66'}
{'_id': 'Y3997', 'Date': 'c. 1932-66'}
{'_id': 'Y4000', 'Date': 'c. 1932-66'}
{'_id': 'Y4002', 'Date': 'c. 1932-66'}
{'_id': 'Y4003', 'Date': 'c. 1932-66'}
{'_id': 'Y4004', 'Date': 'c. 1932-66'}
{'_id': 'Y4006', 'Date': 'c. 1932-66'}
{'_id': 'Y4008', 'Date': 'c. 1932-66'}
{'_id': 'Y4010', 'Date': 'c. 1932-66'}
{'_id': 'Y4011', 'Date': 'c. 1932

{'_id': 'N18610', 'Date': '(Print executed 1964)'}
{'_id': 'N18611', 'Date': '(Print executed 1964)'}
{'_id': 'N18612', 'Date': '(Print executed 1964)'}
{'_id': 'N18613', 'Date': '(Print executed 1964)'}
{'_id': 'N18614', 'Date': '(Print executed 1964)'}
{'_id': 'N18615', 'Date': '(Print executed 1964)'}
{'_id': 'N18616', 'Date': '(Print executed 1964)'}
{'_id': 'N18617', 'Date': '(Print executed 1964)'}
{'_id': 'N18618', 'Date': '(Print executed 1964)'}
{'_id': 'N18619', 'Date': '(Print executed 1964)'}
{'_id': 'N18640', 'Date': '(Print executed 1948)'}
{'_id': 'N18641', 'Date': '1961.  (Prints executed 1948-1960).'}
{'_id': 'N18642', 'Date': '(Prints executed 1951-1952)'}
{'_id': 'N18643', 'Date': '(Prints executed 1952)'}
{'_id': 'N18644', 'Date': '(Prints executed 1952)'}
{'_id': 'N18645', 'Date': '(Prints executed 1952)'}
{'_id': 'N18646', 'Date': '(Print executed 1952)'}
{'_id': 'N18647', 'Date': '(Prints executed 1952)'}
{'_id': 'N18648', 'Date': '(Print executed 1952)'}
{'_id':

{'_id': 'Y30101', 'Date': '1896, published 1938'}
{'_id': 'Y30102', 'Date': '1896, published 1938'}
{'_id': 'Y30103', 'Date': '1896, published 1938'}
{'_id': 'Y30104', 'Date': '1896, published 1938'}
{'_id': 'Y30105', 'Date': '1896, published 1938'}
{'_id': 'Y30106', 'Date': 'c. 1910, published 1938'}
{'_id': 'Y30107', 'Date': 'c. 1910, published 1938'}
{'_id': 'Y30108', 'Date': 'c. 1910, published 1938'}
{'_id': 'Y30109', 'Date': '1896, published 1938'}
{'_id': 'Y30110', 'Date': '1896, published 1938'}
{'_id': 'Y30111', 'Date': '1896, published 1938'}
{'_id': 'Y30112', 'Date': '1896, published 1938'}
{'_id': 'Y30113', 'Date': '1896, published 1938'}
{'_id': 'Y30114', 'Date': '1896, published 1938'}
{'_id': 'Y30115', 'Date': 'c. 1910, published 1938'}
{'_id': 'Y30116', 'Date': 'c. 1910, published 1938'}
{'_id': 'Y30117', 'Date': '1896, published 1938'}
{'_id': 'Y30118', 'Date': 'c. 1910, published 1938'}
{'_id': 'Y30119', 'Date': '1896, published 1938'}
{'_id': 'Y30120', 'Date': 'c. 19

{'_id': 'Y35849', 'Date': '(c. 1926)'}
{'_id': 'Y35854', 'Date': '(c. 1865-68)'}
{'_id': 'Y35856', 'Date': '(1943)'}
{'_id': 'Y35859', 'Date': '(1945)'}
{'_id': 'Y35861', 'Date': '(c. 1926)'}
{'_id': 'Y35864', 'Date': '(1967)'}
{'_id': 'Y35866', 'Date': '(c. 1902)'}
{'_id': 'Y35867', 'Date': 'Paris, December 1912'}
{'_id': 'Y35869', 'Date': '(c. 1905)'}
{'_id': 'Y35871', 'Date': '(c. 1926)'}
{'_id': 'Y35876', 'Date': '(c. 1875)'}
{'_id': 'Y35880', 'Date': '(c. 1926)'}
{'_id': 'Y35883', 'Date': '(1973)'}
{'_id': 'Y35888', 'Date': 'c.1930–48'}
{'_id': 'N35889', 'Date': '(1954-55)'}
{'_id': 'Y35891', 'Date': '(c. 1926)'}
{'_id': 'Y35894', 'Date': '(1973)'}
{'_id': 'Y35895', 'Date': '(1977)'}
{'_id': 'Y35904', 'Date': '(1942)'}
{'_id': 'N35908', 'Date': '(c. 1953)'}
{'_id': 'Y35909', 'Date': '(1955)'}
{'_id': 'Y35937', 'Date': 'April 1972'}
{'_id': 'Y35939', 'Date': '(c. 1875)'}
{'_id': 'N35942', 'Date': '(c. 1953)'}
{'_id': 'Y35943', 'Date': '(1955)'}
{'_id': 'Y35949', 'Date': '(1933)'}
{

{'_id': 'Y48673', 'Date': 'November 1972'}
{'_id': 'Y48674', 'Date': 'July 1973'}
{'_id': 'Y48692', 'Date': 'July 1973'}
{'_id': 'N48818', 'Date': '1960s'}
{'_id': 'Y48828', 'Date': 'January 22, 1974'}
{'_id': 'N48874', 'Date': '1961 or 1962'}
{'_id': 'Y48914', 'Date': '1860 or 1861'}
{'_id': 'N48938', 'Date': 'October 28, 1972'}
{'_id': 'N48970', 'Date': 'November 1, 1972'}
{'_id': 'Y49022', 'Date': '1860 or 1861'}
{'_id': 'N49204', 'Date': 'August 1989'}
{'_id': 'N49243', 'Date': 'November 1990'}
{'_id': 'Y49246', 'Date': 'June 20, 1941'}
{'_id': 'Y49258', 'Date': '1840s'}
{'_id': 'N49261', 'Date': 'March 1989'}
{'_id': 'N49276', 'Date': '1840s'}
{'_id': 'N49279', 'Date': 'August 1989'}
{'_id': 'N49282', 'Date': 'October 15, 1950'}
{'_id': 'Y49346', 'Date': 'c. 1852-55'}
{'_id': 'N49410', 'Date': 'May - June 1991'}
{'_id': 'Y49425', 'Date': 'May - June 1991'}
{'_id': 'N49440', 'Date': 'May - June 1991'}
{'_id': 'N49455', 'Date': 'May - June 1991'}
{'_id': 'N49469', 'Date': 'May - Jun

{'_id': 'N62622', 'Date': '(c. 1955)'}
{'_id': 'N62623', 'Date': '(c. 1955)'}
{'_id': 'N62624', 'Date': '(c. 1955)'}
{'_id': 'N62625', 'Date': '(c. 1955)'}
{'_id': 'N62626', 'Date': '(c. 1955)'}
{'_id': 'N62627', 'Date': '(c. 1955)'}
{'_id': 'N62628', 'Date': '(c. 1955)'}
{'_id': 'N62629', 'Date': '(c. 1955)'}
{'_id': 'Y62632', 'Date': '(1969)'}
{'_id': 'Y62730', 'Date': '1984; reprinted 1993'}
{'_id': 'N62737', 'Date': '(1921) Published 1922'}
{'_id': 'N62766', 'Date': '(1991-92)'}
{'_id': 'Y62767', 'Date': '(1991-92)'}
{'_id': 'N62768', 'Date': '(1991-92)'}
{'_id': 'N62769', 'Date': '(1991-92)'}
{'_id': 'N62770', 'Date': '(1991-92)'}
{'_id': 'N62771', 'Date': '(1991-92)'}
{'_id': 'N62772', 'Date': '(1991-92)'}
{'_id': 'N62773', 'Date': '(1991-92)'}
{'_id': 'N62774', 'Date': '(1991-92)'}
{'_id': 'N62775', 'Date': '(1991-92)'}
{'_id': 'N62776', 'Date': '(1991-92)'}
{'_id': 'N62777', 'Date': '(1991-92)'}
{'_id': 'N62778', 'Date': '(1991-92)'}
{'_id': 'Y62780', 'Date': '(c. 1892)'}
{'_id

{'_id': 'N67507', 'Date': 'published March 1898'}
{'_id': 'N67508', 'Date': 'published March 1898'}
{'_id': 'N67511', 'Date': '(April 6-7) 1970'}
{'_id': 'Y67515', 'Date': '1979, printed 1981, published 1983'}
{'_id': 'Y67522', 'Date': '1916, published 1920'}
{'_id': 'Y67524', 'Date': '(1916, published 1917)'}
{'_id': 'N67525', 'Date': 'published April 1898'}
{'_id': 'N67526', 'Date': 'published April 1898'}
{'_id': 'N67527', 'Date': 'published April 1898'}
{'_id': 'N67528', 'Date': 'published April 1898'}
{'_id': 'N67529', 'Date': 'published April 1898'}
{'_id': 'N67532', 'Date': '(April 8-16) 1970'}
{'_id': 'Y67536', 'Date': '1979, printed 1981, published 1983'}
{'_id': 'Y67537', 'Date': '(1911)'}
{'_id': 'Y67541', 'Date': '(May 4, 1945)'}
{'_id': 'N67542', 'Date': '(1948)'}
{'_id': 'Y67543', 'Date': '(1926)'}
{'_id': 'Y67546', 'Date': '(1916, published 1917)'}
{'_id': 'N67547', 'Date': 'published May 1898'}
{'_id': 'N67548', 'Date': 'published May 1898'}
{'_id': 'N67549', 'Date': 'p

{'_id': 'N74680', 'Date': '(August 21-24) 1961'}
{'_id': 'N74684', 'Date': '(August 29) 1961'}
{'_id': 'Y74686', 'Date': '1967, published 1968'}
{'_id': 'N74688', 'Date': '(August 29-31) 1961'}
{'_id': 'N74692', 'Date': '(August 31-September 5) 1961'}
{'_id': 'Y74704', 'Date': '(September 19-27) 1961'}
{'_id': 'Y74706', 'Date': '(1965)'}
{'_id': 'Y74708', 'Date': '(September 29) 1961'}
{'_id': 'Y74710', 'Date': '1962, published 1963'}
{'_id': 'Y74712', 'Date': '(September 26) 1961'}
{'_id': 'Y74714', 'Date': '1962, published 1963'}
{'_id': 'Y74716', 'Date': '(September 25-29) 1961'}
{'_id': 'Y74720', 'Date': '(September 28-October 2) 1961'}
{'_id': 'Y74724', 'Date': '(October 1-6) 1961'}
{'_id': 'Y74728', 'Date': '(October 1-5) 1961'}
{'_id': 'Y74732', 'Date': '(October 6-12) 1961'}
{'_id': 'Y74736', 'Date': '(October 11-13) 1961'}
{'_id': 'Y74740', 'Date': '(October 23-25) 1961'}
{'_id': 'Y74748', 'Date': '(January 2-15) 1962'}
{'_id': 'Y74752', 'Date': '(January 17-18) 1962'}
{'_id':

{'_id': 'Y82278', 'Date': '(2000)'}
{'_id': 'Y82279', 'Date': '(2000)'}
{'_id': 'Y82280', 'Date': '(2000)'}
{'_id': 'Y82281', 'Date': '(2000)'}
{'_id': 'Y82282', 'Date': '(2000)'}
{'_id': 'Y82295', 'Date': '(1991)'}
{'_id': 'Y82298', 'Date': '(1920)'}
{'_id': 'Y82300', 'Date': '(c. 1920)'}
{'_id': 'Y82317', 'Date': '(1989)'}
{'_id': 'Y82319', 'Date': '(1986-87)'}
{'_id': 'Y82322', 'Date': '(1989)'}
{'_id': 'Y82324', 'Date': '(1962)'}
{'_id': 'Y82325', 'Date': '(1996)'}
{'_id': 'N82333', 'Date': '(2000)'}
{'_id': 'Y82369', 'Date': '1926–1928 (model 1970)'}
{'_id': 'Y82378', 'Date': 'August 1978'}
{'_id': 'Y82379', 'Date': 'December 4, 1978'}
{'_id': 'Y82380', 'Date': 'March 1979'}
{'_id': 'Y82381', 'Date': 'April 1979'}
{'_id': 'Y82383', 'Date': 'April 1979'}
{'_id': 'Y82384', 'Date': 'May 1979'}
{'_id': 'Y82386', 'Date': 'May 1979'}
{'_id': 'Y82388', 'Date': 'June 1979'}
{'_id': 'Y82395', 'Date': 'June 1979'}
{'_id': 'Y82396', 'Date': 'June 1979'}
{'_id': 'Y82407', 'Date': 'June 1979'}

{'_id': 'Y95888', 'Date': '(2002)'}
{'_id': 'Y95889', 'Date': '(2002)'}
{'_id': 'Y95890', 'Date': '(2002)'}
{'_id': 'Y95891', 'Date': '(2002)'}
{'_id': 'Y95892', 'Date': '(2002)'}
{'_id': 'Y95893', 'Date': '(2002)'}
{'_id': 'Y95894', 'Date': '(2002)'}
{'_id': 'N95897', 'Date': '(1998)'}
{'_id': 'Y95898', 'Date': '(1998)'}
{'_id': 'N95899', 'Date': '(1998)'}
{'_id': 'N95900', 'Date': '(1998)'}
{'_id': 'Y95930', 'Date': '(1976)'}
{'_id': 'Y95937', 'Date': '(2003)'}
{'_id': 'Y95938', 'Date': '(2002)'}
{'_id': 'Y95939', 'Date': '(2004)'}
{'_id': 'Y95946', 'Date': '(1999)'}
{'_id': 'Y95947', 'Date': '(1996)'}
{'_id': 'Y95948', 'Date': '(1998)'}
{'_id': 'Y95949', 'Date': '(1999)'}
{'_id': 'Y95950', 'Date': '(2001)'}
{'_id': 'Y95952', 'Date': '(2003)'}
{'_id': 'Y95954', 'Date': '(2003)'}
{'_id': 'Y95955', 'Date': '(2003)'}
{'_id': 'Y95956', 'Date': '(2003)'}
{'_id': 'Y95960', 'Date': '(2003)'}
{'_id': 'Y95961', 'Date': '(2003)'}
{'_id': 'Y95967', 'Date': '(2003)'}
{'_id': 'Y95971', 'Date': '(

{'_id': 'Y111979', 'Date': '(1995)'}
{'_id': 'N111980', 'Date': '(1974)'}
{'_id': 'Y112273', 'Date': '(1964)'}
{'_id': 'Y112876', 'Date': '(1967)'}
{'_id': 'Y112877', 'Date': '(1991)'}
{'_id': 'Y112885', 'Date': '(1990)'}
{'_id': 'Y113206', 'Date': 'c. 1971–72'}
{'_id': 'Y113207', 'Date': 'c. 1970–71'}
{'_id': 'Y113211', 'Date': 'c. 1958–61'}
{'_id': 'Y113346', 'Date': 'December 2006'}
{'_id': 'Y113630', 'Date': 'ca. 1901'}
{'_id': 'Y113631', 'Date': 'ca. 1903'}
{'_id': 'Y113870', 'Date': 'January 8, 1986'}
{'_id': 'Y113871', 'Date': 'January 8, 1987'}
{'_id': 'Y113872', 'Date': 'January 8, 1987'}
{'_id': 'Y114224', 'Date': '(c. 1970s)'}
{'_id': 'Y114226', 'Date': '(c. 1970s)'}
{'_id': 'Y114296', 'Date': 'c. 1957-58'}
{'_id': 'N114297', 'Date': 'c. 1957-58'}
{'_id': 'Y114438', 'Date': 'c. 1942-46'}
{'_id': 'Y114769', 'Date': 'c. 1963–68'}
{'_id': 'Y114771', 'Date': 'c. 1970–71'}
{'_id': 'N114794', 'Date': 'c. 1951-52'}
{'_id': 'Y114858', 'Date': '1921 (executed 1920)'}
{'_id': 'Y114859

{'_id': 'Y148100', 'Date': 'c. late 1960s, stamped 1958'}
{'_id': 'Y148159', 'Date': '(1974)'}
{'_id': 'N148193', 'Date': 'c.1936-38'}
{'_id': 'N148195', 'Date': 'c.1936-38'}
{'_id': 'Y148208', 'Date': '1888–1904, published 1904'}
{'_id': 'Y148675', 'Date': '1968/2012'}
{'_id': 'Y148938', 'Date': '1984 construction after 1964-65 photograph'}
{'_id': 'N149000', 'Date': 'c.1968'}
{'_id': 'N149002', 'Date': 'c.1968'}
{'_id': 'N149003', 'Date': 'c.1968'}
{'_id': 'N149004', 'Date': '1964, assembled c.1968'}
{'_id': 'N149005', 'Date': 'c.1968'}
{'_id': 'N149006', 'Date': 'c.1968'}
{'_id': 'N149008', 'Date': 'c.1968'}
{'_id': 'N149009', 'Date': '1965, published 1967'}
{'_id': 'N149010', 'Date': '1966, published 1967'}
{'_id': 'N149015', 'Date': '1962–63/c. 1968'}
{'_id': 'N149016', 'Date': '1962–63/c. 1968'}
{'_id': 'N149215', 'Date': 'c.1968'}
{'_id': 'Y149218', 'Date': '2002 (originally published 1704)'}
{'_id': 'N149295', 'Date': '(1921)'}
{'_id': 'N149296', 'Date': '(1921)'}
{'_id': 'N149

{'_id': 'Y187318', 'Date': 'February 6, 1931'}
{'_id': 'Y187381', 'Date': 'January 1957'}
{'_id': 'Y187383', 'Date': 'February 1947'}
{'_id': 'N187492', 'Date': 'c.1953'}
{'_id': 'Y187640', 'Date': '1966 (printed 2014)'}
{'_id': 'N187917', 'Date': 'c.1937-1938'}
{'_id': 'N187918', 'Date': 'c. 1951-52'}
{'_id': 'Y188137', 'Date': 'Vallauris, 1947'}
{'_id': 'Y188450', 'Date': '2007/2015'}
{'_id': 'Y188775', 'Date': '1961, printed 1979'}
{'_id': 'N188810', 'Date': 'November 1962'}
{'_id': 'Y188986', 'Date': 'c.1937'}
{'_id': 'Y189087', 'Date': 'c.2010'}
{'_id': 'Y189145', 'Date': 'c.1981'}
{'_id': 'Y189146', 'Date': 'c.1981'}
{'_id': 'Y189173', 'Date': 'c.1980'}
{'_id': 'Y189523', 'Date': '1975 (mailed 1976)'}
{'_id': 'Y189524', 'Date': '1975 (mailed 1976)'}
{'_id': 'Y189525', 'Date': '1975 (mailed 1976)'}
{'_id': 'Y189526', 'Date': '1975 (mailed 1976)'}
{'_id': 'Y189527', 'Date': '1975 (mailed 1976)'}
{'_id': 'Y189528', 'Date': '1975 (mailed 1976)'}
{'_id': 'Y189529', 'Date': '1975 (mail

{'_id': 'Y277416', 'Date': 'c. 1938–60'}
{'_id': 'Y277417', 'Date': 'c. 1938–60'}
{'_id': 'Y277418', 'Date': 'c. 1938–60'}
{'_id': 'Y277419', 'Date': 'c. 1938–60'}
{'_id': 'Y277420', 'Date': 'c. 1938–60'}
{'_id': 'Y277421', 'Date': 'c. 1938–60'}
{'_id': 'Y277422', 'Date': 'c. 1938–60'}
{'_id': 'Y277423', 'Date': 'c. 1938–60'}
{'_id': 'Y277424', 'Date': 'c. 1938–60'}
{'_id': 'Y277425', 'Date': 'c. 1938–60'}
{'_id': 'Y277426', 'Date': 'c. 1938–60'}
{'_id': 'Y277427', 'Date': 'c. 1938–60'}
{'_id': 'Y277428', 'Date': 'c. 1938–60'}
{'_id': 'Y277429', 'Date': 'c. 1938–60'}
{'_id': 'Y277430', 'Date': 'c. 1938–60'}
{'_id': 'Y277431', 'Date': 'c. 1938–60'}
{'_id': 'Y277432', 'Date': 'c. 1938–60'}
{'_id': 'Y277433', 'Date': 'c. 1938–60'}
{'_id': 'Y277434', 'Date': 'c. 1938–60'}
{'_id': 'Y277435', 'Date': 'c. 1938–60'}
{'_id': 'Y277436', 'Date': 'c. 1938–60'}
{'_id': 'Y277437', 'Date': 'c. 1938–60'}
{'_id': 'Y277438', 'Date': 'c. 1938–60'}
{'_id': 'Y277439', 'Date': 'c. 1938–60'}
{'_id': 'Y277440

# Aggregation and loading

In [14]:
# collection
artw = db.artw

# print connection
print("""
Database
==========
{}

Collection
==========
{}
""".format(db, artw), flush=True
)


Database
Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True), 'moma')

Collection
Collection(Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True), 'moma'), 'artw')



In [15]:
# df to dict
df.rename(columns={'Duration (sec.)': 'Duration (sec)'}, inplace=True)
dd = df.to_dict('records')

dd[0]

{'Title': 'Ferdinandsbrücke Project, Vienna, Austria (Elevation, preliminary version)',
 'Artist': 'Otto Wagner',
 'ConstituentID': '6210',
 'ArtistBio': '(Austrian, 1841–1918)',
 'Nationality': '(Austrian)',
 'BeginDate': '(1841)',
 'EndDate': '(1918)',
 'Gender': '(Male)',
 'Date': '1896',
 'Medium': 'Ink and cut-and-pasted painted pages on paper',
 'Dimensions': '19 1/8 x 66 1/2" (48.6 x 168.9 cm)',
 'CreditLine': 'Fractional and promised gift of Jo Carole and Ronald S. Lauder',
 'AccessionNumber': '885.1996',
 'Classification': 'Architecture',
 'Department': 'Architecture & Design',
 'DateAcquired': '1996-04-09',
 'Cataloged': 'Y',
 'ObjectID': 2,
 'URL': 'http://www.moma.org/collection/works/2',
 'ThumbnailURL': 'http://www.moma.org/media/W1siZiIsIjU5NDA1Il0sWyJwIiwiY29udmVydCIsIi1yZXNpemUgMzAweDMwMFx1MDAzZSJdXQ.jpg?sha=137b8455b1ec6167',
 'Circumference (cm)': nan,
 'Depth (cm)': nan,
 'Diameter (cm)': nan,
 'Height (cm)': 48.6,
 'Length (cm)': nan,
 'Weight (kg)': nan,
 'Width (

In [16]:
# insert array
insert = artw.insert_many(dd)

# define the pipeline
pipeline = [
    {"$project": 
     {
         "_id": {"$concat": ["$Cataloged", {"$toString": "$ObjectID"}]},
         "Title": "$Title",
         "Date": "$Date",
         "Artist": {
             "Name": "$Artist", 
             'Bio': "$ArtistBio",
             'Nationality': "$Nationality",
             "Birth": "$BeginDate",
             "Death": "$EndDate", 
             "Gender": "$Gender",
         },
        "Characteristics":{
            "Medium": '$Medium', 
            "Dimensions": '$Dimensions',
            "Circumference": '$Circumference (cm)', 
            "Depth": '$Depth (cm)', 
            "Diameter": '$Diameter (cm)', 
            "Height": '$Height (cm)',
            "Length": '$Length (cm)', 
            "Weight": '$Weight (kg)', 
            "Width": '$Width (cm)', 
            "Seat Height": '$Seat Height (cm)',
            "Duration": '$Duration (sec)'
        },
        "Acquisition": {
            "Date": "$DateAcquired",
            "CreditLine": "$CreditLine",
            "Number": "$AccessionNumber"
        },
        "Classification": "$Classification",
        "Department": "$Department",
        "URL": "$URL", 
        "ThumbnailURL": "$ThumbnailURL"
     }
    },
    { "$out" : "artw" }
]

# perform the aggregation
agr = artw.aggregate(pipeline)

In [17]:
# unset field with null values
[artw.update_many({str(i):np.nan},{"$unset": {str(i):""}}) for i in l]

[<pymongo.results.UpdateResult at 0x10515a550>,
 <pymongo.results.UpdateResult at 0x10515a280>,
 <pymongo.results.UpdateResult at 0x10515a640>,
 <pymongo.results.UpdateResult at 0x10515a410>,
 <pymongo.results.UpdateResult at 0x10515a780>,
 <pymongo.results.UpdateResult at 0x10515a0a0>,
 <pymongo.results.UpdateResult at 0x10515a5f0>,
 <pymongo.results.UpdateResult at 0x10e2a8f00>,
 <pymongo.results.UpdateResult at 0x10515a1e0>,
 <pymongo.results.UpdateResult at 0x10515a0f0>,
 <pymongo.results.UpdateResult at 0x112578af0>,
 <pymongo.results.UpdateResult at 0x1125782d0>,
 <pymongo.results.UpdateResult at 0x1125780f0>,
 <pymongo.results.UpdateResult at 0x110636320>,
 <pymongo.results.UpdateResult at 0x112578c30>,
 <pymongo.results.UpdateResult at 0x112578140>,
 <pymongo.results.UpdateResult at 0x1125787d0>,
 <pymongo.results.UpdateResult at 0x1125783c0>,
 <pymongo.results.UpdateResult at 0x112578d20>,
 <pymongo.results.UpdateResult at 0x112578230>,
 <pymongo.results.UpdateResult at 0x1125