In [5]:
from datetime import datetime
from os.path import exists as file_exists
import pandas as pd
from pprint import pprint as pp
import re
import requests
from time import sleep
import xmltodict
import Credentials      # Get API keys, etc.

apikey = Credentials.prod_api
baseurl = 'https://api-na.hosted.exlibrisgroup.com'
item_query = '/almaws/v1/bibs/{mms_id}/holdings/{holding_id}/items/{item_pid}?apikey={apikey}'

exported_csv = "FullItemList.csv"
filled_csv = "FilledEnumChron.csv"
err_log_txt = "log.txt"

### Run this cell if you want to use the Limited-list CSV
exported_csv = "LimitedList.csv"

#### Create original df

In [6]:
# Depending on how the file was exported, column names may or may have either spaces or underscores
df = pd.read_csv(exported_csv, converters={'Item_ID': str, 'Holdings_ID': str, 'MMS_ID': str, 'Item ID': str, 'Holdings ID': str, 'MMS ID': str})

#### Clean up, tweak & format df

In [7]:
# Remove spaces from column names
df.columns = [c.replace(' ', '_') for c in df.columns]
# Rename certain columns
df = df.rename(columns={'Permanent_Location': 'Location', 'Item_Policy': 'Policy', 'Material_Type': 'Material'})
# Strip leading/trailing space from Description
df.Description = df.Description.str.strip()
# Collapse multiple spaces within the Description
df.Description.replace(' +', ' ', regex=True, inplace=True)

In [8]:
# Add columns for the Enum/Chron fields
EC_fields = ['Enum_A', 'Enum_B', 'Chron_I', 'Chron_J']
df[EC_fields] = None

#### The function for getting info from Description to Enum/Chron fields:

In [9]:
def fill_and_extract(regex, these_fields):
    exp = re.compile(regex)
    for i, f in enumerate(these_fields):
        df[f] = df['Description'].str.extract(exp, expand=True)[i].fillna(df[f])

#### Call it repeatedly for each regex that you come up with:
**[Test your Regex here](https://regex101.com/)**

In [23]:
# Just volume & nothing else
fill_and_extract(r'^v\. ?(\d)+$', ['Enum_A'])

In [24]:
# Just year or range of years (post-18th-century) & nothing else
fill_and_extract(r'^((?:1[89]|20)\d{2}(?:-(?:1[89]|20)?\d{2})?)$', ['Chron_I'])

In [25]:
# Volume + year (or range of years)
fill_and_extract(r'^v\. ?(\d)+ ((?:1[89]|20)\d{2}(?:[\-\/](?:1[89]|20)?\d{2})?)$', ['Enum_A', 'Chron_I'])

#### Then once all those replacements are done, pull filled-in records out to a new dataframe

In [26]:
# Create a dataframe to hold JUST records that get filled
filled = pd.DataFrame()
# Populate the new dataframe with any records that now have at least one Enum/Chron field filled
filled = df.dropna(subset=EC_fields, thresh=1)

#### Then apply the changes via the API and log filled items to the Filled CSV

In [27]:
records = len(filled)
c = 0
needs_header=not file_exists(filled_csv) # Apparently we're creating the file, so it needs a header

In [None]:
### Run this bit to see what got "filled" before hitting the API

for index, row in filled.fillna('').iterrows():
    c += 1
    print(c, ' / '.join((row['MMS_ID'], row['Holdings_ID'], row['Item_ID'])), str(row['Description']), sep="\t")


In [None]:
with open(err_log_txt, 'a') as err_log:
    for index, row in filled.fillna('').iterrows():
        c += 1
        print(c, ' / '.join((row['MMS_ID'], row['Holdings_ID'], row['Item_ID'])), str(row['Description']), sep="\t")
        r = requests.get(''.join([baseurl,
                                  item_query.format(mms_id=str(row['MMS_ID']),
                                                    holding_id=str(row['Holdings_ID']),
                                                    item_pid=str(row['Item_ID']),
                                                    apikey=apikey)]))
        rdict = xmltodict.parse(r.text)
        if r.status_code != 200:
            e = xmltodict.parse(r._content)
            # Log the error
            print(datetime.now().strftime("%Y-%m-%d %H:%M:%S"), ' Error FETCHING item ', row['Item_ID'], ': (', r.status_code, ') ',
                e['web_service_result']['errorList']['error']['errorMessage'],
                sep='', file=err_log)
            # Remove this item from the "filled" df
            filled = filled.drop([index])
            continue
        if (c % (records/100) < 1):
            print(int(100*c/records), '% complete', sep='')#, end='\r')
            sleep(5)
            
        # Merge derived values into the retrieved data (rdict)
        rdict['item']['item_data']['enumeration_a'] = str(row['Enum_A'])
        rdict['item']['item_data']['enumeration_b'] = str(row['Enum_B'])
        rdict['item']['item_data']['chronology_i'] = str(row['Chron_I'])
        rdict['item']['item_data']['chronology_j'] = str(row['Chron_J'])
        # Set an internal note, if there's an empty one available
        if ('Enum/Chron derived from Description' not in rdict['item']['item_data'].values()):
            if (not rdict['item']['item_data']['internal_note_1']):
                rdict['item']['item_data']['internal_note_1'] = 'Enum/Chron derived from Description'
            elif (not rdict['item']['item_data']['internal_note_2']):
                rdict['item']['item_data']['internal_note_2'] = 'Enum/Chron derived from Description'
            elif (not rdict['item']['item_data']['internal_note_3']):
                rdict['item']['item_data']['internal_note_3'] = 'Enum/Chron derived from Description'
            else: # Nbd, just log it
                print(datetime.now().strftime("%Y-%m-%d %H:%M:%S"), ' No internal note available for item MMS ID ',
                    str(row['MMS_ID']), sep="", file=err_log)
 
        # Push the altered record back into Alma
        pxml = xmltodict.unparse(rdict)
        p = requests.put(''.join([baseurl,
                                  item_query.format(mms_id=row['MMS_ID'],
                                                    holding_id=row['Holdings_ID'],
                                                    item_pid=row['Item_ID'],
                                                    apikey=apikey)]),
                         data=pxml.encode('utf-8'), headers={'Content-Type': 'application/xml'})
        if p.status_code != 200:
            e = xmltodict.parse(p._content)
            # Log the error
            print(datetime.now().strftime("%Y-%m-%d %H:%M:%S"), ' Error UPDATING item ', row['Item_ID'], ': (', p.status_code, ') ',
                e['web_service_result']['errorList']['error']['errorMessage'],
                sep='', file=err_log)
            # Remove this item from the "filled" df
            filled = filled.drop([index])
            continue

        # Log it to the CSV
        #    Btw, the 'to_frame().T' transposes it, so it all goes in as a single comma-separated row
        row.to_frame().T.to_csv(filled_csv, mode='a', index=False, header=needs_header)
        needs_header=False # Henceforth

#### Purge filled rows from the original CSV

In [53]:
# Purge filled records from the original df
df = df.loc[~df['Item_ID'].isin(filled['Item_ID'])]

# Re-create the original CSV from that df
df.to_csv(exported_csv, index=False) # By default, will overwrite

# View df & filled

In [None]:
display(df)
display(filled)

# Undo changes to some records!

In [4]:
undo_csv="Undo.csv"
df = pd.read_csv(undo_csv, converters={'Item_ID': str, 'Holdings_ID': str, 'MMS_ID': str, 'Item ID': str, 'Holdings ID': str, 'MMS ID': str, 'Enum_A': str, 'Enum_B': str, 'Chron_I': str, 'Chron_J': str})
# Remove spaces from column names
df.columns = [c.replace(' ', '_') for c in df.columns]
# Rename certain columns
df = df.rename(columns={'Permanent_Location': 'Location', 'Item_Policy': 'Policy', 'Material_Type': 'Material'})
records = len(df)
c = 0

for index, row in df.fillna('').iterrows():
    c += 1
    
    # Get the current record
    print(c, ' / '.join((row['MMS_ID'], row['Holdings_ID'], row['Item_ID'])), str(row['Description']), sep="\t")
    r = requests.get(''.join([baseurl,
                              item_query.format(mms_id=str(row['MMS_ID']),
                                                holding_id=str(row['Holdings_ID']),
                                                item_pid=str(row['Item_ID']),
                                                apikey=apikey)]))
    rdict = xmltodict.parse(r.text)
    if r.status_code != 200:
        e = xmltodict.parse(r._content)
        # Output the error
        print('Error FETCHING item ', row['Item_ID'], ': (', r.status_code, ') ',
            e['web_service_result']['errorList']['error']['errorMessage'],
            sep='')
        continue
    if (c % (records/100) < 1):
        print(int(100*c/records), '% complete', sep='')#, end='\r')
        sleep(5)

    # Merge derived values into the retrieved data (rdict)
    rdict['item']['item_data']['enumeration_a'] = \
        rdict['item']['item_data']['enumeration_b'] = \
        rdict['item']['item_data']['chronology_i'] = \
        rdict['item']['item_data']['chronology_j'] = None
    # Set an internal note, if there's an empty one available
    if (rdict['item']['item_data']['internal_note_1'] == 'Enum/Chron derived from Description'):
        rdict['item']['item_data']['internal_note_1'] = None
    if (rdict['item']['item_data']['internal_note_2'] == 'Enum/Chron derived from Description'):
        rdict['item']['item_data']['internal_note_2'] = None
    if (rdict['item']['item_data']['internal_note_3'] == 'Enum/Chron derived from Description'):
        rdict['item']['item_data']['internal_note_3'] = None

    # Push the altered record back into Alma
    pxml = xmltodict.unparse(rdict)
    p = requests.put(''.join([baseurl,
                              item_query.format(mms_id=row['MMS_ID'],
                                                holding_id=row['Holdings_ID'],
                                                item_pid=row['Item_ID'],
                                                apikey=apikey)]),
                     data=pxml.encode('utf-8'), headers={'Content-Type': 'application/xml'})
    if p.status_code != 200:
        e = xmltodict.parse(p._content)
        # Log the error
        print('Error UPDATING item ', row['Item_ID'], ': (', p.status_code, ') ',
            e['web_service_result']['errorList']['error']['errorMessage'],
            sep='')
        continue


1	991001602639706388 / 2276187460006388 / 2376187390006388	1900
2	991001602639706388 / 2276187460006388 / 2376187420006388	1898
3	991001602639706388 / 2276187460006388 / 2376187450006388	1896
4	991001602639706388 / 2276187460006388 / 2376187390006388	1900
5	991001602639706388 / 2276187460006388 / 2376187420006388	1898
6	991001602639706388 / 2276187460006388 / 2376187450006388	1896
7	991001602639706388 / 2276187460006388 / 2376187390006388	1900
8	991001602639706388 / 2276187460006388 / 2376187420006388	1898
1% complete
9	991001602639706388 / 2276187460006388 / 2376187450006388	1896
10	991004817089706388 / 2275601950006388 / 2375601940006388	1832-May 1860
11	991004813829706388 / 2275656640006388 / 2375656620006388	1966-1970 Index
12	991004817189706388 / 2275656160006388 / 2375656140006388	1986-1990 Index
13	991004813659706388 / 2275658520006388 / 2375658500006388	1971-1975 Index
14	991000973549706388 / 2263132630006388 / 2363132580006388	1981-1985 Index
15	991000973549706388 / 2263132630

118	991004195479706388 / 2261801640006388 / 2361800870006388	2003/40
119	991004195479706388 / 2261801640006388 / 2361800880006388	2003/39
120	991004195479706388 / 2261801640006388 / 2361800890006388	2003/38
121	991004195479706388 / 2261801640006388 / 2361800900006388	2003/37
122	991004195479706388 / 2261801640006388 / 2361800910006388	2003/36
16% complete
123	991004195479706388 / 2261801640006388 / 2361800930006388	2003/34
124	991004195479706388 / 2261801640006388 / 2361800950006388	2003/33
125	991004195479706388 / 2261801640006388 / 2361800970006388	2003/32
126	991004195479706388 / 2261801640006388 / 2361800980006388	2003/31
127	991004195479706388 / 2261801640006388 / 2361801010006388	2003/30
128	991004195479706388 / 2261801640006388 / 2361801020006388	2003/29
129	991004195479706388 / 2261801640006388 / 2361801030006388	2003/28
17% complete
130	991004195479706388 / 2261801640006388 / 2361801040006388	2003/27
131	991004195479706388 / 2261801640006388 / 2361801060006388	2003/26
132	9910

233	991004195479706388 / 2261801640006388 / 2361815040006388	2005/91
234	991004195479706388 / 2261801640006388 / 2361815070006388	2005/90
235	991004195479706388 / 2261801640006388 / 2361815080006388	2005/89
31% complete
236	991004195479706388 / 2261801640006388 / 2361815090006388	2005/88
237	991004195479706388 / 2261801640006388 / 2361815100006388	2005/87
238	991004195479706388 / 2261801640006388 / 2361815120006388	2005/86
239	991004195479706388 / 2261801640006388 / 2361815130006388	2005/85
240	991004195479706388 / 2261801640006388 / 2361815160006388	2005/84
241	991004195479706388 / 2261801640006388 / 2361815170006388	2005/83
242	991004195479706388 / 2261801640006388 / 2361815180006388	2005/82
243	991004195479706388 / 2261801640006388 / 2361815200006388	2005/81
32% complete
244	991004195479706388 / 2261801640006388 / 2361815210006388	2005/80
245	991004195479706388 / 2261801640006388 / 2361815230006388	2005/79
246	991004195479706388 / 2261801640006388 / 2361815250006388	2005/78
247	9910

349	991004195479706388 / 2261801640006388 / 2361816510006388	2004/89
46% complete
350	991004195479706388 / 2261801640006388 / 2361816520006388	2004/88
351	991004195479706388 / 2261801640006388 / 2361816530006388	2004/87
352	991004195479706388 / 2261801640006388 / 2361816540006388	2004/86
353	991004195479706388 / 2261801640006388 / 2361816550006388	2004/85
354	991004195479706388 / 2261801640006388 / 2361816570006388	2004/84
355	991004195479706388 / 2261801640006388 / 2361816620006388	2004/83
356	991004195479706388 / 2261801640006388 / 2361816630006388	2004/82
47% complete
357	991004195479706388 / 2261801640006388 / 2361816660006388	2004/81
358	991004195479706388 / 2261801640006388 / 2361816670006388	2004/80
359	991004195479706388 / 2261801640006388 / 2361816680006388	2004/79
360	991004195479706388 / 2261801640006388 / 2361816690006388	2004/78
361	991004195479706388 / 2261801640006388 / 2361816720006388	2004/77
362	991004195479706388 / 2261801640006388 / 2361816730006388	2004/76
363	9910

466	991001686759706388 / 2270991960006388 / 2370991940006388	2000
467	991001686759706388 / 2270991960006388 / 2370991950006388	2002
468	991001852679706388 / 2262486730006388 / 2362486720006388	2003
469	991001594209706388 / 2268522200006388 / 2368522190006388	2001
470	991001202879706388 / 2263095150006388 / 2363095140006388	2003
62% complete
471	991001228799706388 / 2267481020006388 / 2367480990006388	2001
472	991001228799706388 / 2267481020006388 / 2367481000006388	2000
473	991000987479706388 / 2263321850006388 / 2363321830006388	2001
474	991000987479706388 / 2263321850006388 / 2363321840006388	2000
475	991001202849706388 / 2263096620006388 / 2363096580006388	2003
476	991001202849706388 / 2263096620006388 / 2363096610006388	2001
477	991000740419706388 / 2270505270006388 / 2370505260006388	2001
63% complete
478	991000845899706388 / 2274240850006388 / 2374240820006388	2001
479	991001225399706388 / 2266826260006388 / 2366826220006388	2002
480	991001225399706388 / 2266826260006388 / 236682

586	991002494869706388 / 2261195640006388 / 2361195610006388	2003-2004
587	991002494869706388 / 2261195640006388 / 2361195620006388	2005-2006
588	991002778939706388 / 2275996680006388 / 2375996660006388	2000-2007
589	991003957129706388 / 2273553750006388 / 2373553740006388	2015
590	991001228729706388 / 2267497790006388 / 2367497780006388	2002
591	991000759589706388 / 2265891050006388 / 2365891040006388	2001
78% complete
592	991004014689706388 / 2261129610006388 / 2361129590006388	2014
593	991000736769706388 / 2263241870006388 / 2363241860006388	2002
594	991004628649706388 / 2273475040006388 / 2373475010006388	2001
595	991000953919706388 / 2264876360006388 / 2364876350006388	2003
596	991003010269706388 / 2263434410006388 / 2363434390006388	2001
597	991003010269706388 / 2263434410006388 / 2363434400006388	2000
598	991001852559706388 / 2262412320006388 / 2362412240006388	2001
599	991001817329706388 / 2276690070006388 / 2376690060006388	2003
79% complete
600	991001216439706388 / 2260931230

93% complete
706	991004456389706388 / 2266122450006388 / 2366121230006388	1938
707	991004456389706388 / 2266122450006388 / 2366121240006388	1937
708	991004456389706388 / 2266122450006388 / 2366121250006388	1936
709	991004456389706388 / 2266122450006388 / 2366121260006388	1935
710	991004456389706388 / 2266122450006388 / 2366121270006388	1934
711	991004456389706388 / 2266122450006388 / 2366121280006388	1933
712	991004456389706388 / 2266122450006388 / 2366121290006388	1932
94% complete
713	991004456389706388 / 2266122450006388 / 2366121300006388	1931
714	991004456389706388 / 2266122450006388 / 2366121310006388	1930
715	991004456389706388 / 2266122450006388 / 2366121320006388	1929
716	991004456389706388 / 2266122450006388 / 2366121330006388	1928
717	991004456389706388 / 2266122450006388 / 2366121340006388	1927
718	991004456389706388 / 2266122450006388 / 2366121350006388	1926
719	991004456389706388 / 2266122450006388 / 2366121360006388	1925
720	991004456389706388 / 2266122450006388 / 236612

# Testing stuff