Initialization - read all borrowings from "borrowings" csv-file

In [14]:
import pandas as pd
from pathlib import Path

input_file = Path('../data/processed/borrowings_2019_2025.csv')

data_frame = pd.read_csv(
    input_file,
    sep=';',
    quotechar='"',
    encoding='utf-8'
)

print(data_frame.shape)
data_frame.head()

(2407610, 16)


Unnamed: 0,issue_id,Ausleihdatum/Uhrzeit,Rückgabedatum/Uhrzeit,Leihdauer,Anzahl_Verlängerungen,Verspätet,Tage_zu_spät,Sammlungszeichen/CCODE,Medientyp,Barcode,Titel,Autor,ISBN,Interessenkreis,Benutzerkategorie,Benutzer-Systemnummer
0,56563,2019-01-02 00:00:00,2019-01-24 13:23:23,22.0,0,Nein,0.0,,,,,,,,MDA,26.0
1,56564,2019-01-02 00:00:00,2019-01-30 13:40:14,28.0,0,Nein,0.0,,,,,,,,MPA,50.0
2,56565,2019-01-02 00:00:00,2019-03-06 07:09:36,63.0,1,Nein,0.0,esac,Sachbuch,18143906.0,Hamburg - Der besondere Stadtführer,,9783831904525.0,,MPA,50.0
3,56566,2019-01-02 00:00:00,2019-03-06 07:09:38,63.0,1,Nein,0.0,esac,Sachbuch,18184110.0,Hamburg <Dorling Kindersley>,"Bruschke, Gerhard",9783734205736.0,,MPA,50.0
4,56567,2019-01-02 00:00:00,2019-03-06 07:09:31,63.0,1,Nein,0.0,esac,Sachbuch,17202121.0,Hamburg <Reise-Know-How>,"Fründt, Hans-Jürgen",9783831730735.0,,MPA,50.0


show the amount of borrowings per category

In [15]:
data_frame["Medientyp"].unique()
data_frame["Medientyp"].value_counts()

Medientyp
Kinder u. Jugendbuch           663197
Sachbuch                       332011
DVD                            247870
Belletristik                   237871
Kinder u. Jugend-CD            168296
Comic                          124836
Hörbuch                         56948
Musik-CD                        54010
Fremdsprachige Belletristik     24918
Tonie                           19386
Spiele                          18736
Zeitschriften                   12929
Konsolenspiel                    7429
Sprachkurse                      6745
Sonstiges                        6619
Blu-Ray                          3007
Kamishibai                       1671
Bibliothek der Dinge             1151
Lesehaus                          735
Makerboxen                        628
Bestellt über Medienwelten        569
CD-ROM                            419
Schlüssel                         254
Sach-CD                           110
Tablet                              9
Name: count, dtype: int64

In [16]:
book_categories = [
    "Sachbuch",
    "Belletristik",
    "Fremdsprachige Belletristik",
    "Kinder u. Jugendbuch",
    "Comic"
]

books_frame = data_frame[
    data_frame["Medientyp"].isin(book_categories)
].reset_index(drop=True)

non_books_frame = data_frame[
    ~data_frame["Medientyp"].isin(book_categories)
].reset_index(drop=True)

all_data_frame = data_frame

print(f'=== Book Table ===')
print(books_frame.shape)
# display(books_frame.head())

print(f'=== Non Book Table ===')
print(non_books_frame.shape)
# display(non_books_frame.head())

=== Book Table ===
(1382833, 16)
=== Non Book Table ===
(1024777, 16)


Now we want to examine correlations between media types and borrowings that were returned late. As a first step, we analyze the percentage of items that were returned late. Rows without a media type are ignored.

In [17]:
# remove rows without media type
books_frame = books_frame.dropna(subset=["Medientyp"])
non_books_frame = non_books_frame.dropna(subset=["Medientyp"])
all_data_frame = all_data_frame.dropna(subset=["Medientyp"])

late_borrowings_per_type = []
for media_type, media_type_group in books_frame.groupby("Medientyp"):
    amount_of_total_entries = len(media_type_group)
    amount_of_late_entries = (media_type_group["Verspätet"] == "Ja").sum()
    percent_late = amount_of_late_entries / amount_of_total_entries * 100

    late_borrowings_per_type.append({
        "Medientyp": media_type,
        "Anzahl_Ausleihen": amount_of_total_entries,
        "Anzahl_verspaetet": amount_of_late_entries,
        "Prozent_verspaetet": percent_late
    })

# Ausgabe
late_borrowings_per_type_table = (
    pd.DataFrame(late_borrowings_per_type)
    .sort_values("Prozent_verspaetet", ascending=False)
    .reset_index(drop=True)
)
late_borrowings_per_type_table

Unnamed: 0,Medientyp,Anzahl_Ausleihen,Anzahl_verspaetet,Prozent_verspaetet
0,Sachbuch,332011,18236,5.492589
1,Fremdsprachige Belletristik,24918,1165,4.675335
2,Kinder u. Jugendbuch,663197,30868,4.654424
3,Comic,124836,3805,3.047999
4,Belletristik,237871,6322,2.657743
