In [1]:
import Credentials      # Get API keys, etc.
import pandas as pd
from pprint import pprint as pp
import re

apikey = Credentials.apikey
baseurl = 'https://api-na.hosted.exlibrisgroup.com'
queryUpdateItem = '/almaws/v1/bibs/{mms_id}/holdings/{holding_id}/items/{item_pid}?apikey={apikey}'

In [2]:
### Get MMS/Holding/Item IDs, Descriptions, Locations from spreadsheet ###
df = pd.read_csv("FullItemList.csv", converters={'Item ID':str, 'Holdings ID':str, 'MMS ID':str, })

In [3]:
# Strip it down to just the necessary columns
df = df[['MMS ID', 'Holdings ID', 'Item ID', 'Description', 'Permanent Location']]

# Strip leading/trailing space from Description
df.Description = df.Description.str.strip()
# Collapse multiple spaces within the Description
df.Description.replace(' +', ' ', regex=True, inplace=True)
# Remove spaces from column names
df.columns = [c.replace(' ', '') for c in df.columns]

In [4]:
# Add the fields to be filled
fields = ['EnumA', 'EnumB', 'ChronI', 'ChronJ']
df[fields] = None

### General steps for replacement:
- Create regex which captures info from 
- Fill Enum/Chron field with captured info from Description
- Any records with (newly) filled Enum/Chron field go into the **filled** dataframe
- Purge those same records from the **df** dataframe

#### Next step should really be to get this into a function, though:
- Arguments to pass:
  - Regex with capture groups
  - list of fields to fill
- Then just a for loop to fill each of the fields

In [5]:
def fillAndExtract(regex, fields):
    exp = re.compile(regex)
    for i, f in enumerate(fields):
        df[f] = df['Description'].str.extract(exp, expand=True)[i]

In [6]:
fillAndExtract(r'^(v)\.(\d)+$', ['EnumA', 'EnumB'])

In [14]:
df.loc[0, 'ChronI'] = 'Whatever'

In [15]:
# Create a dataframe to hold JUST records that get filled
filled = pd.DataFrame()
# Populate the new dataframe with any records that now have Enum/Chron info
filled = df.dropna(subset=fields, thresh=1)

In [17]:
filled

Unnamed: 0,MMSID,HoldingsID,ItemID,Description,PermanentLocation,EnumA,EnumB,ChronI,ChronJ
0,991003093289706388,2276563440006388,23128946300006388,87/40,2L Gov Docs,,,Whatever,
956,991004970839806388,22115559890006388,23115559870006388,v.1,LL Graphic Novels,v,1,,
969,991004967739806388,22115169990006388,23115169970006388,v.2,Special Collection,v,2,,
988,991004966838606388,22114949620006388,23114949600006388,v.1,2L Stacks,v,1,,
1012,991004964339106388,22114419490006388,23114419470006388,v.1,LL Graphic Novels,v,1,,
...,...,...,...,...,...,...,...,...,...
99993,991003010709706388,2264534640006388,2364534630006388,v.1,LL Curriculum Coll,v,1,,
99994,991003010709706388,2264534640006388,2364534620006388,v.2,LL Curriculum Coll,v,2,,
99995,991003010709706388,2264534640006388,2364534610006388,v.3,LL Curriculum Coll,v,3,,
99996,991002691069706388,2264534410006388,2364534400006388,v.1,2L Stacks,v,1,,


In [26]:
df = df.loc[~df['ItemID'].isin(filled['ItemID'])]

Unnamed: 0,MMSID,HoldingsID,ItemID,Description,PermanentLocation,EnumA,EnumB,ChronI,ChronJ
1,991003093289706388,2276563440006388,23128946290006388,87/39,2L Gov Docs,,,,
2,991003093289706388,2276563440006388,23128946280006388,87/38,2L Gov Docs,,,,
3,991003093289706388,2276563440006388,23128946270006388,87/37,2L Gov Docs,,,,
4,991003093289706388,2276563440006388,23128946260006388,87/36,2L Gov Docs,,,,
5,991003093289706388,2276563440006388,23128946250006388,87/35,2L Gov Docs,,,,
...,...,...,...,...,...,...,...,...,...
99986,991000870229706388,2264536950006388,2364536620006388,1924,2L Stacks,,,,
99991,991001149869706388,2264534800006388,2364534790006388,v.1-2,LL Curriculum Meeting Room,,,,
99992,991001149869706388,2264534800006388,2364534780006388,v.3-6,LL Curriculum Meeting Room,,,,
99998,991001149919706388,2264533650006388,2364533640006388,v.1-2,LL Curriculum Meeting Room,,,,
