In [4]:
# Date: January 19, 2017
# Author: Oliver Plunket
#
# Description: a script to be used on csv files containing artist rosters and organization verifications for burden-api
# galleries.  The script will output two csvs ready to imported into burden by way of the ImportArtistRostersService
# and the ImportOrganizationVerificationsService.  This script requires the following headers:
#
#      Burden ID, Name, Domain, Type of Organization, Artists are..., Artists
#

In [5]:
import pandas as pd

# import csv exported from G Sheets
df = pd.read_csv('<path to csv>', encoding='utf-8')

# clean headers
df = df.rename(columns={'Burden ID': 'organization_id',
                        'Name': 'name',
                        'Type of Organization': 'organization_type',
                        'Artists are...': 'artist_type'})

# only data rows
df = df.loc[~(df.organization_id.isnull() | df.organization_id.str.contains('[a-zA-Z]', regex=True))]

***Organization Verifications***

In [7]:
# columns
cols = ['organization_id', 'organization_type']
org_vers = df[cols].fillna('')
org_vers['notes'] = ''
org_vers['score'] = ''

# turn answers into scores
org_vers.score.loc[org_vers.organization_type == 'Gallery'] = 2
org_vers.score.loc[org_vers.organization_type != 'Gallery'] = 1
org_vers.notes.loc[~org_vers.organization_type.str.contains('(Gallery|Other)', regex=True)] = org_vers.organization_type.str.lower()

org_vers = org_vers.drop('organization_type', axis=1)
org_vers = org_vers.sort_values(by='organization_id').reset_index(drop=True)

In [None]:
org_vers.to_csv('<desired file path for download>', encoding='utf-8', index=False)

***Artist Rosters***

In [19]:
df = df.fillna('')

headers = list(df.columns.values)
artist_headers = headers[headers.index('Notes')+1:]
rosters_headers = headers[:headers.index('artist_type')+1]

rosters = pd.DataFrame()

# takes a minute
for i in df.index.values:
    row = df.ix[i]
    for x in artist_headers:
        if row[x] != '':
            artist = row[x]
            single_artist_row = row[rosters_headers]
            single_artist_row['artist'] = artist
            rosters = rosters.append(single_artist_row, ignore_index=True)

In [13]:
# cleanup
for x in df.columns.values:
    df[x] = df[x].str.strip()

rosters.artist_type.loc[rosters.artist_type == 'Unknown'] = ''
rosters.artist = rosters.artist.str.title()

cols = ['organization_id', 'artist', 'artist_type']
rosters = rosters[cols]
# current project isn't collecting notes
rosters['notes'] = ''

rosters = rosters.sort_values(by='organization_id').reset_index(drop=True)

In [None]:
rosters.to_csv('<desired file path for download>', encoding='utf-8', index=False)