In [1]:
# dependencies
import pandas as pd

In [2]:
# pull in csv Files
moma_artists_file = "../00_raw_data_files/moma-Artists.csv"
moma_art_file = "../00_raw_data_files/moma-Artworks.csv"

## Get Artists csv to only inlcude columns needed for transforming

In [3]:
# artists df
moma_artists = pd.read_csv(moma_artists_file)
moma_artists = pd.DataFrame(moma_artists)
list(moma_artists.columns)

['ConstituentID',
 'DisplayName',
 'ArtistBio',
 'Nationality',
 'Gender',
 'BeginDate',
 'EndDate',
 'Wiki QID',
 'ULAN']

In [4]:
moma_artists.head()

Unnamed: 0,ConstituentID,DisplayName,ArtistBio,Nationality,Gender,BeginDate,EndDate,Wiki QID,ULAN
0,1,Robert Arneson,"American, 1930–1992",American,Male,1930,1992,,
1,2,Doroteo Arnaiz,"Spanish, born 1936",Spanish,Male,1936,0,,
2,3,Bill Arnold,"American, born 1941",American,Male,1941,0,,
3,4,Charles Arnoldi,"American, born 1946",American,Male,1946,0,Q1063584,500027998.0
4,5,Per Arnoldi,"Danish, born 1941",Danish,Male,1941,0,,


In [5]:
# rename columns
moma_artists = moma_artists.rename(columns={'ConstituentID':'donor_id','DisplayName':'artist_name','ArtistBio':'bio'})
moma_artists.head()

Unnamed: 0,donor_id,artist_name,bio,Nationality,Gender,BeginDate,EndDate,Wiki QID,ULAN
0,1,Robert Arneson,"American, 1930–1992",American,Male,1930,1992,,
1,2,Doroteo Arnaiz,"Spanish, born 1936",Spanish,Male,1936,0,,
2,3,Bill Arnold,"American, born 1941",American,Male,1941,0,,
3,4,Charles Arnoldi,"American, born 1946",American,Male,1946,0,Q1063584,500027998.0
4,5,Per Arnoldi,"Danish, born 1941",Danish,Male,1941,0,,


In [6]:
moma_artists = moma_artists.rename(columns={'Nationality':'nationality','Gender':'gender','BeginDate':'birth_year','EndDate':'death_year','Wiki QID':'wiki','ULAN':'ulan'})
moma_artists.head()

Unnamed: 0,donor_id,artist_name,bio,nationality,gender,birth_year,death_year,wiki,ulan
0,1,Robert Arneson,"American, 1930–1992",American,Male,1930,1992,,
1,2,Doroteo Arnaiz,"Spanish, born 1936",Spanish,Male,1936,0,,
2,3,Bill Arnold,"American, born 1941",American,Male,1941,0,,
3,4,Charles Arnoldi,"American, born 1946",American,Male,1946,0,Q1063584,500027998.0
4,5,Per Arnoldi,"Danish, born 1941",Danish,Male,1941,0,,


In [7]:
# remove columns based on group decision donor_id(unique to museum), bio (repeats other columns):
moma_artists = moma_artists.drop(["donor_id", "bio"], axis =1)
moma_artists.head(2)

Unnamed: 0,artist_name,nationality,gender,birth_year,death_year,wiki,ulan
0,Robert Arneson,American,Male,1930,1992,,
1,Doroteo Arnaiz,Spanish,Male,1936,0,,


In [8]:
# add columns necessary so all csv's have same columns
moma_artists['artist_role'] = " "
moma_artists['birth_place'] = " "
moma_artists['death_place'] = " "
moma_artists['artist_id'] = " "
moma_artists.head(2)

Unnamed: 0,artist_name,nationality,gender,birth_year,death_year,wiki,ulan,artist_role,birth_place,death_place,artist_id
0,Robert Arneson,American,Male,1930,1992,,,,,,
1,Doroteo Arnaiz,Spanish,Male,1936,0,,,,,,


In [9]:
# reorder columns so df match
moma_artists = moma_artists[['artist_id',
                            'artist_name',
                            'artist_role',
                            'nationality',
                            'gender',
                            'birth_year',
                            'birth_place',
                            'death_year',
                            'death_place',
                            'wiki',
                            'ulan']]
moma_artists.head()

Unnamed: 0,artist_id,artist_name,artist_role,nationality,gender,birth_year,birth_place,death_year,death_place,wiki,ulan
0,,Robert Arneson,,American,Male,1930,,1992,,,
1,,Doroteo Arnaiz,,Spanish,Male,1936,,0,,,
2,,Bill Arnold,,American,Male,1941,,0,,,
3,,Charles Arnoldi,,American,Male,1946,,0,,Q1063584,500027998.0
4,,Per Arnoldi,,Danish,Male,1941,,0,,,


In [10]:
# export to csv
moma_artists.to_csv('../01_extracted_cleaned_data/moma_artists_transformed.csv', header=True, index=False) 

## Get artwork csv to only inlcude columns needed for transforming

In [11]:
# artwork df
moma_artwork = pd.read_csv(moma_art_file)
moma_artwork = pd.DataFrame(moma_artwork)


In [12]:
moma_artwork.head(2)

Unnamed: 0,Title,Artist,ConstituentID,ArtistBio,Nationality,BeginDate,EndDate,Gender,Date,Medium,...,ThumbnailURL,Circumference (cm),Depth (cm),Diameter (cm),Height (cm),Length (cm),Weight (kg),Width (cm),Seat Height (cm),Duration (sec.)
0,"Ferdinandsbrücke Project, Vienna, Austria (Ele...",Otto Wagner,6210,"(Austrian, 1841–1918)",(Austrian),(1841),(1918),(Male),1896,Ink and cut-and-pasted painted pages on paper,...,http://www.moma.org/media/W1siZiIsIjU5NDA1Il0s...,,,,48.6,,,168.9,,
1,"City of Music, National Superior Conservatory ...",Christian de Portzamparc,7470,"(French, born 1944)",(French),(1944),(0),(Male),1987,Paint and colored pencil on print,...,http://www.moma.org/media/W1siZiIsIjk3Il0sWyJw...,,,,40.6401,,,29.8451,,


In [13]:
# Remove columns the group decided we did not need
moma_artwork = moma_artwork.drop(['ArtistBio','Nationality','BeginDate','EndDate', 'Gender', 'ObjectID','Circumference (cm)','Depth (cm)','Diameter (cm)','Weight (kg)', 'Height (cm)','Length (cm)','Seat Height (cm)','Duration (sec.)','Width (cm)','Cataloged','Dimensions'], axis = 1)
moma_artwork.head()

Unnamed: 0,Title,Artist,ConstituentID,Date,Medium,CreditLine,AccessionNumber,Classification,Department,DateAcquired,URL,ThumbnailURL
0,"Ferdinandsbrücke Project, Vienna, Austria (Ele...",Otto Wagner,6210,1896,Ink and cut-and-pasted painted pages on paper,Fractional and promised gift of Jo Carole and ...,885.1996,Architecture,Architecture & Design,1996-04-09,http://www.moma.org/collection/works/2,http://www.moma.org/media/W1siZiIsIjU5NDA1Il0s...
1,"City of Music, National Superior Conservatory ...",Christian de Portzamparc,7470,1987,Paint and colored pencil on print,Gift of the architect in honor of Lily Auchinc...,1.1995,Architecture,Architecture & Design,1995-01-17,http://www.moma.org/collection/works/3,http://www.moma.org/media/W1siZiIsIjk3Il0sWyJw...
2,"Villa near Vienna Project, Outside Vienna, Aus...",Emil Hoppe,7605,1903,"Graphite, pen, color pencil, ink, and gouache ...",Gift of Jo Carole and Ronald S. Lauder,1.1997,Architecture,Architecture & Design,1997-01-15,http://www.moma.org/collection/works/4,http://www.moma.org/media/W1siZiIsIjk4Il0sWyJw...
3,"The Manhattan Transcripts Project, New York, N...",Bernard Tschumi,7056,1980,Photographic reproduction with colored synthet...,Purchase and partial gift of the architect in ...,2.1995,Architecture,Architecture & Design,1995-01-17,http://www.moma.org/collection/works/5,http://www.moma.org/media/W1siZiIsIjEyNCJdLFsi...
4,"Villa, project, outside Vienna, Austria, Exter...",Emil Hoppe,7605,1903,"Graphite, color pencil, ink, and gouache on tr...",Gift of Jo Carole and Ronald S. Lauder,2.1997,Architecture,Architecture & Design,1997-01-15,http://www.moma.org/collection/works/6,http://www.moma.org/media/W1siZiIsIjEyNiJdLFsi...


In [14]:
list(moma_artwork.columns)

['Title',
 'Artist',
 'ConstituentID',
 'Date',
 'Medium',
 'CreditLine',
 'AccessionNumber',
 'Classification',
 'Department',
 'DateAcquired',
 'URL',
 'ThumbnailURL']

In [15]:
# rename columns in artwork df
moma_artwork = moma_artwork.rename(columns={
    'Title':'title', 
    'Artist':'artist_name',
    'ConstituentID': 'donor_id',
    'Date' : 'create_year',
    'Medium': 'medium',
    'CreditLine': 'acq',
    'AccessionNumber' : 'acc_num',
    'Classification': 'classification',
    'Department': 'dept',
    'DateAcquired': 'acq_date',
    'URL': 'web_url',
    'ThumbnailURL' : 'thumb_url'
})
moma_artwork.head()

Unnamed: 0,title,artist_name,donor_id,create_year,medium,acq,acc_num,classification,dept,acq_date,web_url,thumb_url
0,"Ferdinandsbrücke Project, Vienna, Austria (Ele...",Otto Wagner,6210,1896,Ink and cut-and-pasted painted pages on paper,Fractional and promised gift of Jo Carole and ...,885.1996,Architecture,Architecture & Design,1996-04-09,http://www.moma.org/collection/works/2,http://www.moma.org/media/W1siZiIsIjU5NDA1Il0s...
1,"City of Music, National Superior Conservatory ...",Christian de Portzamparc,7470,1987,Paint and colored pencil on print,Gift of the architect in honor of Lily Auchinc...,1.1995,Architecture,Architecture & Design,1995-01-17,http://www.moma.org/collection/works/3,http://www.moma.org/media/W1siZiIsIjk3Il0sWyJw...
2,"Villa near Vienna Project, Outside Vienna, Aus...",Emil Hoppe,7605,1903,"Graphite, pen, color pencil, ink, and gouache ...",Gift of Jo Carole and Ronald S. Lauder,1.1997,Architecture,Architecture & Design,1997-01-15,http://www.moma.org/collection/works/4,http://www.moma.org/media/W1siZiIsIjk4Il0sWyJw...
3,"The Manhattan Transcripts Project, New York, N...",Bernard Tschumi,7056,1980,Photographic reproduction with colored synthet...,Purchase and partial gift of the architect in ...,2.1995,Architecture,Architecture & Design,1995-01-17,http://www.moma.org/collection/works/5,http://www.moma.org/media/W1siZiIsIjEyNCJdLFsi...
4,"Villa, project, outside Vienna, Austria, Exter...",Emil Hoppe,7605,1903,"Graphite, color pencil, ink, and gouache on tr...",Gift of Jo Carole and Ronald S. Lauder,2.1997,Architecture,Architecture & Design,1997-01-15,http://www.moma.org/collection/works/6,http://www.moma.org/media/W1siZiIsIjEyNiJdLFsi...


In [16]:
# Add museum name
moma_artwork['museum_code'] = 2
moma_artwork['artwork_id']= ' '
moma_artwork['artist_id']= ' ' 
moma_artwork['ulan'] = ' '
moma_artwork['donor_name']= ' '
moma_artwork['donor_type']= ' '
moma_artwork.head()

Unnamed: 0,title,artist_name,donor_id,create_year,medium,acq,acc_num,classification,dept,acq_date,web_url,thumb_url,museum_code,artwork_id,artist_id,ulan,donor_name,donor_type
0,"Ferdinandsbrücke Project, Vienna, Austria (Ele...",Otto Wagner,6210,1896,Ink and cut-and-pasted painted pages on paper,Fractional and promised gift of Jo Carole and ...,885.1996,Architecture,Architecture & Design,1996-04-09,http://www.moma.org/collection/works/2,http://www.moma.org/media/W1siZiIsIjU5NDA1Il0s...,2,,,,,
1,"City of Music, National Superior Conservatory ...",Christian de Portzamparc,7470,1987,Paint and colored pencil on print,Gift of the architect in honor of Lily Auchinc...,1.1995,Architecture,Architecture & Design,1995-01-17,http://www.moma.org/collection/works/3,http://www.moma.org/media/W1siZiIsIjk3Il0sWyJw...,2,,,,,
2,"Villa near Vienna Project, Outside Vienna, Aus...",Emil Hoppe,7605,1903,"Graphite, pen, color pencil, ink, and gouache ...",Gift of Jo Carole and Ronald S. Lauder,1.1997,Architecture,Architecture & Design,1997-01-15,http://www.moma.org/collection/works/4,http://www.moma.org/media/W1siZiIsIjk4Il0sWyJw...,2,,,,,
3,"The Manhattan Transcripts Project, New York, N...",Bernard Tschumi,7056,1980,Photographic reproduction with colored synthet...,Purchase and partial gift of the architect in ...,2.1995,Architecture,Architecture & Design,1995-01-17,http://www.moma.org/collection/works/5,http://www.moma.org/media/W1siZiIsIjEyNCJdLFsi...,2,,,,,
4,"Villa, project, outside Vienna, Austria, Exter...",Emil Hoppe,7605,1903,"Graphite, color pencil, ink, and gouache on tr...",Gift of Jo Carole and Ronald S. Lauder,2.1997,Architecture,Architecture & Design,1997-01-15,http://www.moma.org/collection/works/6,http://www.moma.org/media/W1siZiIsIjEyNiJdLFsi...,2,,,,,


In [17]:
list(moma_artwork.columns)


['title',
 'artist_name',
 'donor_id',
 'create_year',
 'medium',
 'acq',
 'acc_num',
 'classification',
 'dept',
 'acq_date',
 'web_url',
 'thumb_url',
 'museum_code',
 'artwork_id',
 'artist_id',
 'ulan',
 'donor_name',
 'donor_type']

In [18]:
# export to csv
moma_artwork.to_csv('../01_extracted_cleaned_data/moma_artwork_transformed.csv', header=True, index=False) 