In [62]:
import pandas as pd

In [63]:
df_nn = pd.read_csv("cmoa_no_null.csv")
df_nn.head()

Unnamed: 0,title,creation_date,creation_date_earliest,creation_date_latest,medium,accession_number,id,credit_line,date_acquired,department,...,party_type,full_name,cited_name,role,nationality,birth_date,death_date,birth_place,death_place,small_img_url
0,Keith Haring,1984,1984-01-01,1984-01-01,gelatin silver print,2002.17,cmoa:things/692a68c5-af1e-4124-80f1-cbf38be51abe,Milton Fine Fund,2002-06-06,Contemporary Art,...,Person,Robert Mapplethorpe,"Mapplethorpe, Robert",,American,1947-01-01,1989-01-01,"New York City (New York state, United States)","New York City (New York state, United States)",https://cmoa-collection-images.s3.amazonaws.co...
1,Untitled,1964-1965,1964-01-01,1965-01-01,oil on canvas,2002.19,cmoa:things/871505af-a072-4a84-ab09-3abf932951b8,Joseph Soffer Family Trust Fund,2002-06-06,Contemporary Art,...,Person,Jo Baer,"Baer, Jo",,American,1929-01-01,,"Seattle (King County, Washington, United States)",,https://cmoa-collection-images.s3.amazonaws.co...
2,Snuff bottle: Carved Glass,late 18th century,1770-01-01,1799-01-01,glass,79.H,cmoa:things/fd7a0764-d061-4de5-9820-7076510037aa,Gift of Howard Heinz,1938-12-28,Decorative Arts and Design,...,Person,unknown Chinese,unknown Chinese,,Chinese,,,,,https://cmoa-collection-images.s3.amazonaws.co...
3,Snuff bottle: Carved Glass,late 19th century,1875-01-01,1899-01-01,glass,77.H,cmoa:things/25d8d23b-03a7-480f-ab1d-d36b5d01f543,Gift of Howard Heinz,1938-12-28,Decorative Arts and Design,...,Person,unknown Chinese,unknown Chinese,,Chinese,,,,,https://cmoa-collection-images.s3.amazonaws.co...
4,Snuff bottle: Carved Glass,18th century,1700-01-01,1799-01-01,glass,80.H,cmoa:things/4d8941d0-b89f-4750-a538-71e0037417d8,Gift of Howard Heinz,1938-12-28,Decorative Arts and Design,...,Person,unknown Chinese,unknown Chinese,,Chinese,,,,,https://cmoa-collection-images.s3.amazonaws.co...


In [64]:
import re

In [65]:
pattern = re.compile('(\d?\dth)')
s = """
... someline abc
... someother line
... some more lines"""
pattern.findall(s)

[]

In [66]:
def transform_date(date_str: str):
    date_str = str(date_str)
    # checks century structure:
    pattern = re.compile('(\d?\dth)')
    matches = pattern.findall(date_str)
    matches = [int(m.replace("th", "")) for m in matches]

    if len(matches)>0 and ("century" in date_str):
        # checks for "late" first because of the late
        if "late" in date_str:
            date_int = min(matches) * 100 + 99
        elif "early" in date_str:
            date_int = max(matches) * 100 + 0
        else:
            date_int = matches[0] * 100 + 50
        return date_int
    
    # checks \d\d\d\d structure:
    pattern = re.compile('(\d\d\d\d)')
    matches = pattern.findall(date_str)
    matches = [int(m) for m in matches]

    if len(matches)>0:
        date_int = min(matches)
        return date_int

    # if there's no date or an unknown date, use the following:
    return 999999

def transform_date_df(df: pd.DataFrame):
    new_df = df.loc[:, :]
    new_df["date_transformed"] = df["creation_date"].map(transform_date)
    return new_df


In [67]:
df_nn = transform_date_df(df_nn)

In [68]:
df_nn.columns

Index(['title', 'creation_date', 'creation_date_earliest',
       'creation_date_latest', 'medium', 'accession_number', 'id',
       'credit_line', 'date_acquired', 'department', 'physical_location',
       'item_width', 'item_height', 'item_depth', 'item_diameter', 'web_url',
       'provenance_text', 'classification', 'image_url', 'artist_id',
       'party_type', 'full_name', 'cited_name', 'role', 'nationality',
       'birth_date', 'death_date', 'birth_place', 'death_place',
       'small_img_url', 'date_transformed'],
      dtype='object')

In [69]:
df_names = df_nn["full_name"].value_counts()
df_names = df_names[ ~df_names.index.str.contains("unknown")]
df_names

full_name
Charles "Teenie" Harris    466
Samuel Rosenberg           225
Hiroshige Andō             212
W. Eugene Smith            160
Clyde Hare                 135
                          ... 
Thomas J. Doyle              1
Jean René Bazaine            1
Hans Albert Falk             1
David Levine                 1
George Inness                1
Name: count, Length: 3528, dtype: int64

In [70]:
amount = 200
df_names[:amount]

full_name
Charles "Teenie" Harris                         466
Samuel Rosenberg                                225
Hiroshige Andō                                  212
W. Eugene Smith                                 160
Clyde Hare                                      135
                                               ... 
Lorcan O'Herlihy|Lorcan O'Herlihy Architects     10
Giacomo Manzù                                    10
Joseph M. Margulies                              10
Christopher James                                10
Azechi Umetaro                                   10
Name: count, Length: 200, dtype: int64

In [71]:
df_filtered = df_nn[ df_nn["full_name"].isin(df_names[:amount].index) ]
df_filtered.head()

Unnamed: 0,title,creation_date,creation_date_earliest,creation_date_latest,medium,accession_number,id,credit_line,date_acquired,department,...,full_name,cited_name,role,nationality,birth_date,death_date,birth_place,death_place,small_img_url,date_transformed
14,Hôtel de la Duchesse de Villars--rue des Saint...,1901-1902,1901-01-01,1902-01-01,albumen print,2002.24.2.3,cmoa:things/bff5fb1b-1e33-479c-a9e5-acd61d07c526,Second Century Acquisition Fund,2002-06-06,Photography,...,Eugène Atget,"Atget, Eugène",,French,1857-01-01,1927-01-01,"Libourne (Gironde, Aquitaine, France)",Paris,https://cmoa-collection-images.s3.amazonaws.co...,1901
15,Ancien Monastère des Bénédictins anglais--269 ...,1905,1905-01-01,1905-01-01,albumen print,2002.24.2.5,cmoa:things/95c2efac-cbeb-4886-8fe0-dd97ad675a2b,Second Century Acquisition Fund,2002-06-06,Photography,...,Eugène Atget,"Atget, Eugène",,French,1857-01-01,1927-01-01,"Libourne (Gironde, Aquitaine, France)",Paris,https://cmoa-collection-images.s3.amazonaws.co...,1905
20,"Fontaine du Montreuil, faubourg Saint-Antoine ...",1904-1905,1904-01-01,1905-01-01,albumen print,2002.24.2.4,cmoa:things/f46e92ad-427a-4262-9bfa-ad06af9da8ef,Second Century Acquisition Fund,2002-06-06,Photography,...,Eugène Atget,"Atget, Eugène",,French,1857-01-01,1927-01-01,"Libourne (Gironde, Aquitaine, France)",Paris,https://cmoa-collection-images.s3.amazonaws.co...,1904
25,"Rue (du Chevalier) de la Barre-Montmarte, 18e ...",1899-1900,1899-01-01,1900-01-01,albumen print,2002.24.2.1,cmoa:things/5575373f-33e5-43ee-8214-a77383762d93,Second Century Acquisition Fund,2002-06-06,Photography,...,Eugène Atget,"Atget, Eugène",,French,1857-01-01,1927-01-01,"Libourne (Gironde, Aquitaine, France)",Paris,https://cmoa-collection-images.s3.amazonaws.co...,1899
26,"Cabaret de l'Épée de Bois - ou fut assassiné, ...",1901,1901-01-01,1901-01-01,albumen print,2002.24.2.2,cmoa:things/50a2526c-08a2-4b35-97f6-3b0f4aa9e43d,Second Century Acquisition Fund,2002-06-06,Photography,...,Eugène Atget,"Atget, Eugène",,French,1857-01-01,1927-01-01,"Libourne (Gironde, Aquitaine, France)",Paris,https://cmoa-collection-images.s3.amazonaws.co...,1901


In [61]:
df_filtered.to_csv("cmoa_vis2.csv")

In [39]:
df_filtered[df_filtered["full_name"]=="Alfred Bendiner"]

Unnamed: 0,title,creation_date,creation_date_earliest,creation_date_latest,medium,accession_number,id,credit_line,date_acquired,department,...,full_name,cited_name,role,nationality,birth_date,death_date,birth_place,death_place,small_img_url,date_transformed
10254,Mexico City,,1919-01-01,1964-01-01,ink on laminated linen,82.80.10,cmoa:things/8f438fd6-09d4-4ea5-9cfa-29880cf8eeb9,Gift of Mrs. Alfred Bendiner,1982-12-02,Contemporary Art,...,Alfred Bendiner,"Bendiner, Alfred",,American,1899-01-01,1964-01-01,"Pittsburgh, Pennsylvania","Philadelphia, Pennsylvania",https://cmoa-collection-images.s3.amazonaws.co...,999999
10255,Jerusalem,,1919-01-01,1964-01-01,lithograph on paper,82.80.6,cmoa:things/2c5494c9-1d68-4f2a-8c78-408ca713c202,Gift of Mrs. Alfred Bendiner,1982-12-02,Fine Arts,...,Alfred Bendiner,"Bendiner, Alfred",,American,1899-01-01,1964-01-01,"Pittsburgh, Pennsylvania","Philadelphia, Pennsylvania",https://cmoa-collection-images.s3.amazonaws.co...,999999
10256,Matador,,1919-01-01,1964-01-01,lithograph on paper,82.80.8,cmoa:things/98e5dc56-e44a-46f1-8474-3d3742c224a9,Gift of Mrs. Alfred Bendiner,1982-12-02,Fine Arts,...,Alfred Bendiner,"Bendiner, Alfred",,American,1899-01-01,1964-01-01,"Pittsburgh, Pennsylvania","Philadelphia, Pennsylvania",https://cmoa-collection-images.s3.amazonaws.co...,999999
14132,Gargoyle,,1919-01-01,1964-01-01,pencil on paper,94.99.10,cmoa:things/3962c90f-3bf1-49dd-a4ab-0b1912300111,Gift of the Alfred and Elizabeth Bendiner Foun...,1994-06-09,Heinz Architectural Center,...,Alfred Bendiner,"Bendiner, Alfred",,American,1899-01-01,1964-01-01,"Pittsburgh, Pennsylvania","Philadelphia, Pennsylvania",https://cmoa-collection-images.s3.amazonaws.co...,999999
14133,Chair; [sketch],,1919-01-01,1964-01-01,ink on paper,94.99.11,cmoa:things/c96fe518-5e52-4a06-b688-ab2bd21086d3,Gift of the Alfred and Elizabeth Bendiner Foun...,1994-06-09,Heinz Architectural Center,...,Alfred Bendiner,"Bendiner, Alfred",,American,1899-01-01,1964-01-01,"Pittsburgh, Pennsylvania","Philadelphia, Pennsylvania",https://cmoa-collection-images.s3.amazonaws.co...,999999
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14188,Voice of the Turtle,,1919-01-01,1964-01-01,lithograph with crayon on paper,94.99.62,cmoa:things/86bf2766-f166-4d3a-a761-0553e54c8794,Gift of the Alfred and Elizabeth Bendiner Foun...,1994-06-09,Heinz Architectural Center,...,Alfred Bendiner,"Bendiner, Alfred",,American,1899-01-01,1964-01-01,"Pittsburgh, Pennsylvania","Philadelphia, Pennsylvania",https://cmoa-collection-images.s3.amazonaws.co...,999999
14189,Pencil sketch with paper scraps,,1919-01-01,1964-01-01,pencil on paper,94.99.6,cmoa:things/95404fd3-e1a3-4719-bf81-4b6532e9058c,Gift of the Alfred and Elizabeth Bendiner Foun...,1994-06-09,Heinz Architectural Center,...,Alfred Bendiner,"Bendiner, Alfred",,American,1899-01-01,1964-01-01,"Pittsburgh, Pennsylvania","Philadelphia, Pennsylvania",https://cmoa-collection-images.s3.amazonaws.co...,999999
14190,Francescatti,,1919-01-01,1964-01-01,lithograph on paper,94.99.7,cmoa:things/625d36ca-de59-4494-8c25-b90f43483764,Gift of the Alfred and Elizabeth Bendiner Foun...,1994-06-09,Heinz Architectural Center,...,Alfred Bendiner,"Bendiner, Alfred",,American,1899-01-01,1964-01-01,"Pittsburgh, Pennsylvania","Philadelphia, Pennsylvania",https://cmoa-collection-images.s3.amazonaws.co...,999999
14191,At the Opera,,1919-01-01,1964-01-01,ink on paper,94.99.8,cmoa:things/8d245111-d221-4b81-a17b-a2ddfa889db3,Gift of the Alfred and Elizabeth Bendiner Foun...,1994-06-09,Heinz Architectural Center,...,Alfred Bendiner,"Bendiner, Alfred",,American,1899-01-01,1964-01-01,"Pittsburgh, Pennsylvania","Philadelphia, Pennsylvania",https://cmoa-collection-images.s3.amazonaws.co...,999999
