In [1]:
import pandas as pd

In [20]:
from datetime import datetime, timedelta

In [3]:
df = pd.read_csv(
    "src_data/u_bkop_incident_230603.csv",
    parse_dates=["created", "resolved"],
    low_memory=False
)

In [4]:
df.shape[0]

121479

In [5]:
df.columns

Index(['number', 'created', 'caller', 'state', 'category', 'subcategory',
       'assignment_group', 'bpl_location', 'nyp_location', 'resolved',
       'assigned_to', 'system', 'mat_source', 'close_code', 'url',
       'reassignment_count', 'updates', 'resolved_by'],
      dtype='object')

In [6]:
df["category"].unique()

array(['Cataloging', 'Acquisitions', 'Selection', 'Logistics',
       'Offsite - ReCAP', 'Collections Processing', nan], dtype=object)

### orginated as CAT or resolved by CAT

In [7]:
cdf = df[(df["category"] == "Cataloging")|(df["assignment_group"] == "BKOPS CAT")]

In [8]:
cdf.shape[0]

21250

## CAT tickets volumne by year

In [9]:
gyear_cdf = cdf.groupby(cdf["created"].map(lambda x: x.year))

In [10]:
ycdf = pd.DataFrame(columns=["year", "tickets"])

In [11]:
for year, d in gyear_cdf:
    new_row = pd.Series({"year": year, "tickets": d.shape[0]})
    ycdf = pd.concat([ycdf, new_row.to_frame().T], ignore_index=True)

In [12]:
ycdf

Unnamed: 0,year,tickets
0,2013,1311
1,2014,2140
2,2015,2739
3,2016,2554
4,2017,2233
5,2018,2112
6,2019,1785
7,2020,955
8,2021,1964
9,2022,2169


In [13]:
ycdf.to_csv("public_data/cat-by-year.csv", index=False)

In [14]:
ycdf_rl = pd.DataFrame(columns=["system", "year", "tickets"])
ycdf_bl = pd.DataFrame(columns=["system", "year", "tickets"])
ycdf_bp = pd.DataFrame(columns=["system", "year", "tickets"])

In [15]:
for year, d in gyear_cdf:
    rl_df = d[d["system"] == "NYPL Research"]
    bl_df = d[d["system"] == "NYPL Circulating"]
    bp_df = d[d["system"] == "BPL Circulating"]
    new_rl = pd.Series({"system": "NYPL RL", "year": year, "tickets" : rl_df.shape[0]})
    new_bl = pd.Series({"system": "NYPL BL", "year": year, "tickets": bl_df.shape[0]})
    new_bp = pd.Series({"system": "BPL", "year": year, "tickets": bp_df.shape[0]})
    ycdf_rl = pd.concat([ycdf_rl, new_rl.to_frame().T], ignore_index=True)
    ycdf_bl = pd.concat([ycdf_bl, new_bl.to_frame().T], ignore_index=True)
    ycdf_bp = pd.concat([ycdf_bp, new_bp.to_frame().T], ignore_index=True)

In [16]:
ycdf_rl.to_csv("public_data/rl-by-year.csv", index=False)
ycdf_bl.to_csv("public_data/bl-by-year.csv", index=False)
ycdf_bp.to_csv("public_data/bp-by-year.csv", index=False)

## Last year most frequent requesting locations

In [17]:
# treat three systems separatly

In [21]:
start_date = datetime.today() - timedelta(365)
print(start_date)

2022-06-04 17:46:58.956717


In [57]:
loc_columns = ["location", "tickets"]

#### NYPL Research

In [22]:
loc_rl = cdf[(cdf["system"] == "NYPL Research") & (cdf["created"] >= start_date)]

In [23]:
loc_rl.shape[0]

251

In [34]:
gloc_rl = pd.DataFrame(columns=loc_columns)

In [35]:
for loc, d in loc_rl.groupby("nyp_location"):
    new_row = pd.Series({"location": loc, "tickets": d.shape[0]})
    gloc_rl = pd.concat([gloc_rl, new_row.to_frame().T], ignore_index=True)
    print(loc, d.shape[0])

Library Services Center 8
Library for the Performing Arts 13
Schomburg Center for Research in Black Culture 35
Stavros Niarchos Foundation Library 5
Stephen A. Schwarzman Building 190


In [41]:
gloc_rl = gloc_rl.sort_values("tickets", ascending=False)

In [43]:
gloc_rl.to_csv("public_data/freq-loc-rl.csv", index=False)

#### NYPL Circ

In [45]:
loc_bl = cdf[(cdf["system"] == "NYPL Circulating") & (cdf["created"] >= start_date)]

In [46]:
loc_bl.shape[0]

1340

In [48]:
gloc_bl = pd.DataFrame(columns=loc_columns)

In [50]:
for loc, d in loc_bl.groupby("nyp_location"):
    new_row = pd.Series({"location": loc, "tickets": d.shape[0]})
    gloc_bl = pd.concat([gloc_bl, new_row.to_frame().T], ignore_index=True)

In [51]:
gloc_bl = gloc_bl.sort_values("tickets", ascending=False)

In [52]:
gloc_bl.head()

Unnamed: 0,location,tickets
14,Bronx Library Center,140
66,Stavros Niarchos Foundation Library,113
41,Library for the Performing Arts,93
67,Stephen A. Schwarzman Building,69
59,Seward Park Library,61


In [53]:
gloc_bl.to_csv("public_data/freq-loc-bl.csv", index=False)

#### BPL

In [54]:
loc_bp = cdf[(cdf["system"] == "BPL Circulating") & (cdf["created"] >= start_date)]

In [55]:
loc_bp.shape[0]

1034

In [58]:
gloc_bp = pd.DataFrame(columns=loc_columns)

In [59]:
for loc, d in loc_bp.groupby("bpl_location"):
    new_row = pd.Series({"location": loc, "tickets": d.shape[0]})
    gloc_bp = pd.concat([gloc_bp, new_row.to_frame().T], ignore_index=True)

In [60]:
gloc_bp = gloc_bp.sort_values("tickets", ascending=False)

In [61]:
gloc_bp.head()

Unnamed: 0,location,tickets
17,Central Literature & Languages - 14,149
6,Brooklyn Heights - 50,128
20,Central SST - 16,95
62,Windsor Terrace - 77,55
15,Central HBR (Hist/Biog/Rel) - 13,54


In [62]:
gloc_bp.to_csv("public_data/freq-loc-bp.csv", index=False)