# Library Collection Data

In [None]:
import pandas as pd

collection = pd.read_csv('data/dartmouth-library-collection-lcc.csv',)
collection.rename(columns={' COUNT(Physical Item Id)': 'n_titles', 
                           'Permanent LC Classification Code': 'lcc'},
                  inplace=True)
collection.drop(columns=['Library Name (Active)'], inplace=True)
collection

In [None]:
collection['main_class'] = collection.lcc.apply(lambda x: x[0] if isinstance(x, str) else None)
collection['subclass'] = collection.lcc.apply(lambda x: x[1] if isinstance(x, str) and len(x) > 1 else '_')
collection['subsubclass'] = collection.lcc.apply(lambda x: x[2] if isinstance(x, str) and len(x) > 2 else '_')
collection.dropna(subset=['main_class', 'subclass'], inplace=True)

collection.loc[collection['lcc'] == 'Unknown', ['main_class', 'subclass', 'subsubclass']] = ['Unknown', '_', '_']
collection

In [None]:
import plotly.express as px
fig = px.treemap(collection, 
                 path=['main_class', 'subclass', 'subsubclass'], 
                 values='n_titles',
                )
fig.show()

In [None]:
collection.groupby(by=['main_class', 'subclass']).n_titles.sum().unstack().plot.bar(stacked=True, legend=False)

In [None]:
import colorsys

import matplotlib.colors

base_color = {
    '_': "#000000",
    'A': "#E5F5F9",
    'B': "#1D91C0",
    'C': "#67001F",
    'D': "#F7FCFD",
    'E': "#CB181D",
    'F': "#78C679",
    'G': "#F46D43",
    'H': "#A6CEE3",
    'I': "#FD8D3C",
    'J': "#A6D854",
    'K': "#D4B9DA",
    'L': "#6A51A3",
    'M': "#7F0000",
    'N': "#D9D9D9",
    'O': "#FFF7BC",
    'P': "#000000",
    'Q': "#F0F0F0",
    'R': "#C7EAE5",
    'S': "#003C30",
    'T': "#F16913",
    'U': "#FFF7FB",
    'V': "#8C6BB1",
    'W': "#C7E9B4",
    'X': "#762A83",
    'Y': "#FC9272",
    'Z': "#AE017E"
}

def get_shade(idx):
    base, shade = idx.split('-')
    h, s, v = colorsys.rgb_to_hsv(*matplotlib.colors.to_rgb(base_color[base]))
    v += (ord(shade)-65) * .03
    v = max(0, v)
    v = min(1, v)
    return colorsys.hsv_to_rgb(h, s, v)


colors = [get_shade(col) for _, col in collection.columns] 

In [None]:
collection.plot.bar(stacked=True, color=colors, legend=False)