In [1]:
import pandas as pd

In [2]:
fh = '../files/tickets-gen-all.csv'

In [3]:
df = pd.read_csv(fh, index_col=0, parse_dates=['created', 'opened_at', 'updated_on'])

In [4]:
df.shape

(75724, 22)

In [5]:
df.columns

Index(['created', 'caller', 'opened_at', 'opened_by', 'priority', 'state',
       'category', 'subcategory', 'assignment_group', 'bpl_location',
       'nyp_locaiton', 'resolved', 'assigned_to', 'system', 'closed_at',
       'closed_by', 'mat_source', 'reassignment_count', 'reopen_count',
       'resolved_by', 'updated_on', 'updated_by'],
      dtype='object')

In [6]:
df['caller'].describe()

count          75707
unique          1931
top       Diane Chin
freq             971
Name: caller, dtype: object

In [7]:
df.dtypes

created               datetime64[ns]
caller                        object
opened_at             datetime64[ns]
opened_by                     object
priority                      object
state                         object
category                      object
subcategory                   object
assignment_group              object
bpl_location                  object
nyp_locaiton                  object
resolved                      object
assigned_to                   object
system                        object
closed_at                     object
closed_by                     object
mat_source                    object
reassignment_count             int64
reopen_count                   int64
resolved_by                   object
updated_on            datetime64[ns]
updated_by                    object
dtype: object

In [8]:
df['priority'].unique()

array(['3 - Moderate', '1 - Critical', '5 - Planning', '4 - Low', nan,
       '2 - High'], dtype=object)

In [9]:
df['state'].unique()

array(['Closed', 'Active', 'Awaiting User Info', 'Awaiting Vendor', 'New',
       'Resolved'], dtype=object)

In [10]:
df['category'].unique()

array(['Cataloging', 'Acquisitions', 'Selection', 'Logistics',
       'Offsite - ReCAP', 'Collections Processing', nan], dtype=object)

In [11]:
df['subcategory'].unique()

array(['Item attached to wrong bib record', 'Item not linked', 'Other',
       'Call number error', 'Cataloging error', 'Packing list error',
       'Materials not processed with labels, etc.',
       'Holds fulfillment--NYPL only', 'Item record problem',
       'Volume record needed', 'Duplicate records',
       'Supplies: Circ bins, discard bins, bin ties, tubs, etc.',
       'Call number missing', 'Location code error',
       'Shipment delivery error', 'Status of request',
       'Pickup or delivery request', 'Spine labels', 'Damaged material',
       'Invoice payment', 'Materials received without paperwork',
       'Author and submission inquiries',
       'Barcodes for circulating materials', 'Collection HQ',
       'Online Catalog', 'Gift books or Donations--NYPL only',
       'Barcoding', 'OCLC Holdings', 'Delivery issue or error',
       'New Item/Title Request', 'Electronic resource',
       'Damaged material / missing pieces', 'BiblioCommons',
       'Leased items', 'Book ja

In [12]:
df['reassignment_count'].describe()

count    75724.000000
mean         0.110916
std          0.364769
min          0.000000
25%          0.000000
50%          0.000000
75%          0.000000
max          8.000000
Name: reassignment_count, dtype: float64

In [13]:
adf_out = pd.DataFrame(columns=['dept', 'year', 'tickets'])

In [14]:
years = [2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020]
for agroup, adf in df.groupby('category'):
    d = dict()
    for y, ydf in adf.groupby(adf['created'].map(lambda x: x.year)):
        d[y]={'dept': agroup, 'year': y, 'tickets': ydf.shape[0]}
    for y in years:
        if y in d.keys():
            adf_out = adf_out.append(d[y], ignore_index=True)
        else:
            adf_out = adf_out.append({'dept': agroup, 'year': y, 'tickets': 0}, ignore_index=True)         

In [15]:
adf_out.head()

Unnamed: 0,dept,year,tickets
0,Acquisitions,2013,110
1,Acquisitions,2014,139
2,Acquisitions,2015,185
3,Acquisitions,2016,290
4,Acquisitions,2017,325


In [16]:
adf_out.to_csv('../data-display/tickets-per-dept-yearly.csv', index=False)

In [17]:
# open, awaiting tickets per department
odf = df[(df['state']!='Closed')&(df['state']!='Resolved')]

In [18]:
odf.shape

(3409, 22)

In [19]:
odf_out = pd.DataFrame(columns=['dept', 'year', 'tickets'])
years = [2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020]
for agroup, adf in odf.groupby('category'):
    d = dict()
    for y, ydf in adf.groupby(adf['created'].map(lambda x: x.year)):
        d[y]={'dept': agroup, 'year': y, 'tickets': ydf.shape[0]}
    for y in years:
        if y in d.keys():
            odf_out = odf_out.append(d[y], ignore_index=True)
        else:
            odf_out = odf_out.append({'dept': agroup, 'year': y, 'tickets': 0}, ignore_index=True)

In [20]:
odf_out.head()

Unnamed: 0,dept,year,tickets
0,Acquisitions,2013,13
1,Acquisitions,2014,20
2,Acquisitions,2015,32
3,Acquisitions,2016,53
4,Acquisitions,2017,65


In [21]:
odf_out.to_csv('../data-display/unresolved-per-dept-yearly.csv', index=False)

In [22]:
# filter out tickets that did not end up in cataloging
cdf = df[((df['category']=='Cataloging')&(df['assignment_group']=='BKOPS CAT'))|(df['assignment_group']=='BKOPS CAT')]

In [23]:
cdf.shape

(14470, 22)

In [24]:
cdf['assignment_group'].unique()

array(['BKOPS CAT'], dtype=object)

In [35]:
cdf['category'].unique()
states = ['Closed', 'Resolved', 'Awaiting User Info', 'Awaiting Vendor', 'Active', 'New']

In [36]:
sdf_out = pd.DataFrame(columns=['year', 'state', 'tickets', 'stack_order'])
for state, sdf in cdf.groupby('state'):
    d = dict()
    for y, ydf in sdf.groupby(sdf['created'].map(lambda x: x.year)):
        d[y] = {'year': y, 'state': state, 'tickets': ydf.shape[0], 'stack_order': states.index(state)}
    for y in years:
        if y in d.keys():
            sdf_out = sdf_out.append(d[y], ignore_index=True)
        else:
            sdf_out = sdf_out.append({'year': y, 'state': state, 'tickets': 0, 'stack_order': states.index(state)}, ignore_index=True)

In [37]:
sdf_out.shape

(48, 4)

In [38]:
sdf_out.to_csv('../data-display/cat-tickets-per-state-yearly.csv', index=False)

In [181]:
for cgroup, cdf in df.groupby('category'):
    print(f'{cgroup}={cdf.shape[0]}')
    if cgroup == 'Cataloging':
        miscat_count = cdf[cdf['assignment_group'] != 'BKOPS CAT'].shape[0]
        print(f'miscategorized tickets in cataloging: {miscat_count}')

Acquisitions=1604
Cataloging=14620
miscategorized tickets in cataloging: 600
Collections Processing=16333
Logistics=18744
Offsite - ReCAP=128
Selection=24291
