In [1]:
import pandas as pd

In [2]:
from datetime import datetime, timedelta

In [3]:
df = pd.read_csv(
    "src_data/u_bkop_incident_230603.csv",
    parse_dates=["created", "resolved"],
    low_memory=False
)

In [4]:
df.shape[0]

121479

In [5]:
df.columns

Index(['number', 'created', 'caller', 'state', 'category', 'subcategory',
       'assignment_group', 'bpl_location', 'nyp_location', 'resolved',
       'assigned_to', 'system', 'mat_source', 'close_code', 'url',
       'reassignment_count', 'updates', 'resolved_by'],
      dtype='object')

In [6]:
df["category"].unique()

array(['Cataloging', 'Acquisitions', 'Selection', 'Logistics',
       'Offsite - ReCAP', 'Collections Processing', nan], dtype=object)

### orginated as CAT or resolved by CAT

In [7]:
cdf = df[(df["category"] == "Cataloging")|(df["assignment_group"] == "BKOPS CAT")]

In [8]:
cdf.shape[0]

21250

## CAT tickets volumne by year

In [9]:
gyear_cdf = cdf.groupby(cdf["created"].map(lambda x: x.year))

In [10]:
ycdf = pd.DataFrame(columns=["year", "tickets"])

In [11]:
for year, d in gyear_cdf:
    new_row = pd.Series({"year": year, "tickets": d.shape[0]})
    ycdf = pd.concat([ycdf, new_row.to_frame().T], ignore_index=True)

In [12]:
ycdf

Unnamed: 0,year,tickets
0,2013,1311
1,2014,2140
2,2015,2739
3,2016,2554
4,2017,2233
5,2018,2112
6,2019,1785
7,2020,955
8,2021,1964
9,2022,2169


In [13]:
ycdf.to_csv("public_data/cat-by-year.csv", index=False)

In [14]:
ycdf_rl = pd.DataFrame(columns=["system", "year", "tickets"])
ycdf_bl = pd.DataFrame(columns=["system", "year", "tickets"])
ycdf_bp = pd.DataFrame(columns=["system", "year", "tickets"])

In [15]:
for year, d in gyear_cdf:
    rl_df = d[d["system"] == "NYPL Research"]
    bl_df = d[d["system"] == "NYPL Circulating"]
    bp_df = d[d["system"] == "BPL Circulating"]
    new_rl = pd.Series({"system": "NYPL RL", "year": year, "tickets" : rl_df.shape[0]})
    new_bl = pd.Series({"system": "NYPL BL", "year": year, "tickets": bl_df.shape[0]})
    new_bp = pd.Series({"system": "BPL", "year": year, "tickets": bp_df.shape[0]})
    ycdf_rl = pd.concat([ycdf_rl, new_rl.to_frame().T], ignore_index=True)
    ycdf_bl = pd.concat([ycdf_bl, new_bl.to_frame().T], ignore_index=True)
    ycdf_bp = pd.concat([ycdf_bp, new_bp.to_frame().T], ignore_index=True)

In [16]:
ycdf_rl.to_csv("public_data/rl-by-year.csv", index=False)
ycdf_bl.to_csv("public_data/bl-by-year.csv", index=False)
ycdf_bp.to_csv("public_data/bp-by-year.csv", index=False)

## Last year most frequent requesting locations

In [17]:
# treat three systems separatly

In [18]:
start_date = datetime(2023, 6, 4) - timedelta(365)
print(start_date)

2022-06-04 00:00:00


In [19]:
loc_columns = ["location", "tickets"]

#### NYPL Research

In [20]:
loc_rl = cdf[(cdf["system"] == "NYPL Research") & (cdf["created"] >= start_date)]

In [21]:
loc_rl.shape[0]

251

In [22]:
gloc_rl = pd.DataFrame(columns=loc_columns)

In [23]:
for loc, d in loc_rl.groupby("nyp_location"):
    new_row = pd.Series({"location": loc, "tickets": d.shape[0]})
    gloc_rl = pd.concat([gloc_rl, new_row.to_frame().T], ignore_index=True)
    print(loc, d.shape[0])

Library Services Center 8
Library for the Performing Arts 13
Schomburg Center for Research in Black Culture 35
Stavros Niarchos Foundation Library 5
Stephen A. Schwarzman Building 190


In [24]:
gloc_rl = gloc_rl.sort_values("tickets", ascending=False)

In [25]:
gloc_rl.to_csv("public_data/freq-loc-rl.csv", index=False)

#### NYPL Circ

In [26]:
loc_bl = cdf[(cdf["system"] == "NYPL Circulating") & (cdf["created"] >= start_date)]

In [27]:
loc_bl.shape[0]

1342

In [28]:
gloc_bl = pd.DataFrame(columns=loc_columns)

In [29]:
for loc, d in loc_bl.groupby("nyp_location"):
    new_row = pd.Series({"location": loc, "tickets": d.shape[0]})
    gloc_bl = pd.concat([gloc_bl, new_row.to_frame().T], ignore_index=True)

In [30]:
gloc_bl = gloc_bl.sort_values("tickets", ascending=False)

In [31]:
gloc_bl.head()

Unnamed: 0,location,tickets
14,Bronx Library Center,140
66,Stavros Niarchos Foundation Library,113
41,Library for the Performing Arts,93
67,Stephen A. Schwarzman Building,69
59,Seward Park Library,61


In [32]:
gloc_bl.to_csv("public_data/freq-loc-bl.csv", index=False)

#### BPL

In [33]:
loc_bp = cdf[(cdf["system"] == "BPL Circulating") & (cdf["created"] >= start_date)]

In [34]:
loc_bp.shape[0]

1035

In [35]:
gloc_bp = pd.DataFrame(columns=loc_columns)

In [36]:
for loc, d in loc_bp.groupby("bpl_location"):
    new_row = pd.Series({"location": loc, "tickets": d.shape[0]})
    gloc_bp = pd.concat([gloc_bp, new_row.to_frame().T], ignore_index=True)

In [37]:
gloc_bp = gloc_bp.sort_values("tickets", ascending=False)

In [38]:
gloc_bp.head()

Unnamed: 0,location,tickets
17,Central Literature & Languages - 14,149
6,Brooklyn Heights - 50,128
20,Central SST - 16,96
62,Windsor Terrace - 77,55
15,Central HBR (Hist/Biog/Rel) - 13,54


In [39]:
gloc_bp.to_csv("public_data/freq-loc-bp.csv", index=False)

## Ticket Categories

In [40]:
# last year only!

In [41]:
columns = ["category", "tickets"]

In [42]:
cat_rl = pd.DataFrame(columns=columns)

In [43]:
for cat, d in loc_rl.groupby("subcategory"):
    new_row = pd.Series({"category": cat, "tickets": d.shape[0]})
    cat_rl = pd.concat([cat_rl, new_row.to_frame().T], ignore_index=True)

In [44]:
cat_rl.to_csv("public_data/cat-pie-rl.csv", index=False)

In [45]:
cat_bl = pd.DataFrame(columns=columns)

In [46]:
for cat, d in loc_bl.groupby("subcategory"):
    new_row = pd.Series({"category": cat, "tickets": d.shape[0]})
    cat_bl = pd.concat([cat_bl, new_row.to_frame().T], ignore_index=True)

In [47]:
cat_bl.to_csv("public_data/cat-pie-bl.csv", index=False)

In [48]:
cat_bp = pd.DataFrame(columns=columns)

In [49]:
for cat, d in loc_bp.groupby("subcategory"):
    new_row = pd.Series({"category": cat, "tickets": d.shape[0]})
    cat_bp = pd.concat([cat_bp, new_row.to_frame().T], ignore_index=True)

In [50]:
cat_bp.to_csv("public_data/cat-pie-bp.csv", index=False)

### Average agent workload

In [51]:
# must split between systems
# RL - 5, BL - 6, BP - 6

In [58]:
ave_rl_tickets_per_agent = (loc_rl.shape[0] / 52.1) / 5

In [59]:
print(ave_rl_tickets_per_agent)

0.963531669865643


In [60]:
ave_bl_tickets_per_agent_per_week = (loc_bl.shape[0] / 52.1) / 6

In [61]:
print(ave_bl_tickets_per_agent_per_week)

4.293026231605886


In [62]:
ave_bp_tickets_per_agent_per_week = (loc_bp.shape[0] / 52.1) / 6

In [63]:
print(ave_bp_tickets_per_agent_per_week)

3.310940499040307


In [65]:
ave_circ_tickets_per_agent_per_week = ((loc_bl.shape[0] + loc_bp.shape[0]) / 52.1) / 12

In [66]:
print(ave_circ_tickets_per_agent_per_week)

3.8019833653230966


In [None]:
# before and after Jan 19, 2023

In [113]:
point_date = datetime(2023, 1, 19)
start_date = datetime(2023, 1, 19) - timedelta(365)

In [114]:
print(start_date, point_date)

2022-01-19 00:00:00 2023-01-19 00:00:00


In [115]:
before_df = cdf[(cdf["system"] != "NYPL Research") & (cdf["resolved"] >= start_date) & (cdf["resolved"] < point_date)]

In [116]:
before_df.shape[0]

1859

In [117]:
after_df = cdf[(cdf["system"] != "NYPL Research") & (cdf["resolved"] >= point_date)]

In [118]:
after_df.shape[0]

1825

In [172]:
agents_before_weekly = pd.DataFrame(columns=["agent", "tickets"]).iloc[0:0]
agents_before = pd.DataFrame(columns=["agent", "tickets"]).iloc[0:0]

In [173]:
for agent, d in before_df.groupby("resolved_by"):
    new_row_weekly = pd.Series({"agent": agent, "tickets": d.shape[0] / 52.1})
    new_row = pd.Series({"agent": agent, "tickets": d.shape[0]})
    agents_before_weekly = pd.concat([agents_before_weekly, new_row_weekly.to_frame().T], ignore_index=True)
    agents_before = pd.concat([agents_before, new_row.to_frame().T], ignore_index=True)

In [174]:
agents_before = agents_before.sort_values("tickets", ascending=False)
agents_before_weekly = agents_before_weekly.sort_values("tickets", ascending=False)

In [175]:
agents_before.to_csv("private_data/agents-circ-before.csv", index=False)
agents_before_weekly.to_csv("private_data/agents-circ-before-weekly.csv", index=False)

In [176]:
agents_after = pd.DataFrame(columns=["agent", "tickets"]).iloc[0:0]
agents_after_weekly = pd.DataFrame(columns=["agent", "tickets"]).iloc[0:0]

In [177]:
for agent, d in after_df.groupby("resolved_by"):
    new_row_weekly = pd.Series({"agent": agent, "tickets": d.shape[0] / 52.1})
    new_row = pd.Series({"agent": agent, "tickets": d.shape[0]})
    agents_after = pd.concat([agents_after, new_row.to_frame().T], ignore_index=True)
    agents_after_weekly = pd.concat([agents_after_weekly, new_row_weekly.to_frame().T], ignore_index=True)

In [178]:
agents_after = agents_after.sort_values("tickets", ascending=False)
agents_after_weekly = agents_after_weekly.sort_values("tickets", ascending=False)

In [179]:
agents_after.to_csv("private_data/agents-circ-after.csv", index=False)

In [180]:
agents_after_weekly.to_csv("private_data/agents-circ-after-weekly.csv", index=False)