In [1]:
import pandas as pd

fh = "../files/picklist/google-candidates-clean.txt"

In [2]:
df = pd.read_csv(
    fh,
    delimiter="\t",
    encoding="utf-8",
    engine="python",
    names=[
        "iNumber", "iCode2", "iType", "iLocation", "iStatus", "iMessage", "iOpacMessage",
        "iAgency", "iBarcode", "iUnit", "iCallNum", "iVolume", "iIntNote", "Message",
        "DateType", "Date1", "Date2", "title", "pubInfo", "bCallNum", "bStaffCallNum"],
    dtype={"iNumber": str, "iIntNote": str, "iCode2": str, "itype": int, "iBarcode": str})

In [3]:
print(sorted(df["iLocation"].unique()))

['bur51', 'lsxx2', 'mab82', 'mab88', 'mab92', 'mab98', 'mabb2', 'mabm2', 'mac82', 'macc2', 'mae82', 'maee2', 'maf82', 'maf88', 'maf92', 'maf98', 'maff3', 'mag  ', 'mag82', 'mag92', 'mag98', 'magg1', 'magg2', 'mai32', 'mai82', 'mak32', 'mal  ', 'mal72', 'mal82', 'mal92', 'mall1', 'malm2', 'malv2', 'maor2', 'map  ', 'map82', 'map92', 'map98', 'map99', 'mapp1', 'mapp2', 'mapp3', 'mapp8', 'mapp9', 'marr2', 'mas82', 'mauu2', 'pad22', 'pad32', 'pad38', 'pah32', 'pam11', 'pam32', 'pam38', 'pat11', 'pat28', 'pat32', 'pat38', 'pat42', 'rc2cf', 'rc2ma', 'rccd2', 'rccd9', 'rccf2', 'rcma2', 'rcmb2', 'rcmb8', 'rcmf2', 'rcmf8', 'rcmg2', 'rcmg8', 'rcmg9', 'rcmi2', 'rcml2', 'rcmo2', 'rcmp2', 'rcmr2', 'rcpd2', 'rcph2', 'rcph9', 'rcpm2', 'rcpm8', 'rcpt2', 'rcx28', 'rcxx2', 'sccc2', 'scdd2', 'scff1', 'scff2', 'scff3']


In [4]:
print(sorted(df["iType"].unique()))

[0, 1, 2, 3, 4, 6, 7, 8, 9, 12, 15, 16, 18, 20, 21, 22, 23, 25, 26, 27, 28, 29, 32, 33, 34, 35, 37, 41, 42, 51, 52, 55, 57, 60, 61, 65, 66]


In [6]:
def determine_building(row):
    if row["iLocation"].startswith("rc"):
        return "ReCAP"
    elif row["iLocation"].startswith("ma"):
        return "SASB"
    elif row["iLocation"].startswith("sc"):
        return "SCH"
    elif row["iLocation"].startswith("pa"):
        return "LPA"

In [7]:
def do_not_sent(row):
    if not row["iLocation"].startswith(("rc", "ma", "pa", "sc")):
        return "TRUE"
    elif int(row["iType"]) > 101:
        return "TRUE"
    elif int(row["iType"]) in (6, 9, 10, 11,12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 28, 30, 31, 32, 33, 34, 35, 36, 37, 38, 41, 42, 43, 66, 68, 70, 79, 51, 52, 53):
        return "TRUE"
    # elif "Evaluated for Google Books" in row["iIntNote"]:
        # return "TRUE"
    else:
        return "FALSE"
    

In [8]:
df["building"] = df.apply(lambda row: determine_building(row), axis=1)

In [9]:
df["do_not_sent"] = df.apply(lambda row: do_not_sent(row), axis=1)

In [10]:
df.head()

Unnamed: 0,iNumber,iCode2,iType,iLocation,iStatus,iMessage,iOpacMessage,iAgency,iBarcode,iUnit,...,Message,DateType,Date1,Date2,title,pubInfo,bCallNum,bStaffCallNum,building,do_not_sent
0,i105072515,-,33,rc2ma,-,-,2,214,33433011077942,,...,,u,1888,9999,Report of meeting.,"Sydney : [The Association], 1888-",*EC.A939,*EC.A939,ReCAP,True
1,i105116038,-,3,rc2ma,-,-,2,214,33433008942330,,...,,u,18uu,9999,Annual report.,"Newcastle-on-Tyne, 18",TDRA (Shipconstructors' and Shipwrights' Assoc...,TDRA (Shipconstructors' and Shipwrights' Assoc...,ReCAP,False
2,i105135902,-,33,rc2ma,-,-,2,214,33433010763328,,...,,u,19uu,9999,Stenografisch verslag van de algemeene vergade...,"Dalfsen[, 19",TDA (Koninklijke Nederlandsche Middenstandsbon...,TDA (Koninklijke Nederlandsche Middenstandsbon...,ReCAP,True
3,i106436284,-,32,rc2ma,-,-,2,214,33433007280369,,...,,m,1861,1862,London labour and the London poor; a cyclopædi...,"London, Griffin, Bohn, and Company, 1861-1862.","TDI (Mayhew, H. London labour and the London p...","TDI (Mayhew, H. London labour and the London p...",ReCAP,True
4,i107088150,-,32,rc2ma,-,-,2,218,33433007581329,,...,,m,1875,1878,Ch. Darwin's Gesammelte Werke. Aus dem Englisc...,"Stuttgart, E. Schweizerbart'sche Verlagshandlu...","PQF (Darwin, C. R. Ch. Darwin's gesammelte Werke)","PQF (Darwin, C. R. Ch. Darwin's gesammelte Werke)",ReCAP,True


In [11]:
df_sasb = df[(df["building"] == "SASB") & (df["do_not_sent"] == "FALSE")]
df_sasb.to_csv("../files/picklist/sasb.csv", index=False, sep="\t", columns=["iNumber", "iLocation", "iBarcode", "title", "pubInfo", "bCallNum", "bStaffCallNum"])

In [12]:
df_recap = df[(df["building"] == "ReCAP") & (df["do_not_sent"] == "FALSE")]
df_recap.to_csv("../files/picklist/recap.csv", index=False, sep="\t", columns=["iBarcode"])

In [13]:
df_do_not_sent = df[df["do_not_sent"] == "TRUE"]
df_do_not_sent.to_csv("../files/picklist/do_not_sent.csv", index=False, sep="\t", columns=["iBarcode"])

In [14]:
df.tail()

Unnamed: 0,iNumber,iCode2,iType,iLocation,iStatus,iMessage,iOpacMessage,iAgency,iBarcode,iUnit,...,Message,DateType,Date1,Date2,title,pubInfo,bCallNum,bStaffCallNum,building,do_not_sent
199978,i288195838,-,3,rc2ma,-,-,2,214,33433099884953,barco/jm,...,,c,1908,9999,Dartmouth alumni magazine.,"Hanover, N.H., Dartmouth Secretaries Association.",STG (Dartmouth alumni magazine),STG (Dartmouth alumni magazine),ReCAP,False
199979,i28819598x,-,3,rc2ma,-,-,2,214,33433099884714,barco/jm,...,,c,1908,9999,Dartmouth alumni magazine.,"Hanover, N.H., Dartmouth Secretaries Association.",STG (Dartmouth alumni magazine),STG (Dartmouth alumni magazine),ReCAP,False
199980,i288196077,-,3,rc2ma,-,-,2,214,33433099884730,barco/jm,...,,c,1908,9999,Dartmouth alumni magazine.,"Hanover, N.H., Dartmouth Secretaries Association.",STG (Dartmouth alumni magazine),STG (Dartmouth alumni magazine),ReCAP,False
199981,i288196120,-,3,rc2ma,-,-,2,214,33433099884755,barco/jm,...,,c,1908,9999,Dartmouth alumni magazine.,"Hanover, N.H., Dartmouth Secretaries Association.",STG (Dartmouth alumni magazine),STG (Dartmouth alumni magazine),ReCAP,False
199982,i288196181,-,3,rc2ma,-,-,2,214,33433099884771,barco/jm,...,,c,1908,9999,Dartmouth alumni magazine.,"Hanover, N.H., Dartmouth Secretaries Association.",STG (Dartmouth alumni magazine),STG (Dartmouth alumni magazine),ReCAP,False
