# Counting canceled appointments

In [186]:
import pandas as pd

In [187]:
# Sample data for testing
dataset = pd.DataFrame([
                       ["ala", pd.Timestamp('20130201 00:00:00'), 1, 2],\
                       ["ala", pd.Timestamp('20130201 00:00:00'), 0, 2],\
                       ["ola", pd.Timestamp('20130121 00:00:00'), 0, 2],\
                       ["ola", pd.Timestamp('20130312 00:00:00'), 1, 2],\
                       ["ala", pd.Timestamp('20130111 00:00:00'), 1, 2],\
                       ["ola", pd.Timestamp('20130901 00:00:00'), 0, 2],\
                       ["ola", pd.Timestamp('20130115 00:00:00'), 1, 2],\
                       ["ola", pd.Timestamp('20130801 00:00:00'), 0, 2],\
                       ["ala", pd.Timestamp('20130509 00:00:00'), 1, 2],\
                       ["ola", pd.Timestamp('20130211 00:00:00'), 1, 2]],\
                       columns=["PatientId","AppointmentDay","No-show","xxxx"])

dataset.sort_values(["AppointmentDay"])

Unnamed: 0,PatientId,AppointmentDay,No-show,xxxx
4,ala,2013-01-11,1,2
6,ola,2013-01-15,1,2
2,ola,2013-01-21,0,2
0,ala,2013-02-01,1,2
1,ala,2013-02-01,0,2
9,ola,2013-02-11,1,2
3,ola,2013-03-12,1,2
8,ala,2013-05-09,1,2
7,ola,2013-08-01,0,2
5,ola,2013-09-01,0,2


In [188]:
# Counting canceled appointment for each patient 
dataset["numberOfCancellation"] = dataset.sort_values(["AppointmentDay"]).groupby("PatientId")["No-show"].cumsum()

dataset.sort_values(["AppointmentDay"]) 

Unnamed: 0,PatientId,AppointmentDay,No-show,xxxx,numberOfCancellation
4,ala,2013-01-11,1,2,1
6,ola,2013-01-15,1,2,1
2,ola,2013-01-21,0,2,1
0,ala,2013-02-01,1,2,2
1,ala,2013-02-01,0,2,2
9,ola,2013-02-11,1,2,2
3,ola,2013-03-12,1,2,3
8,ala,2013-05-09,1,2,3
7,ola,2013-08-01,0,2,3
5,ola,2013-09-01,0,2,3


*Jeśli pacjent w tym samym dniu ma więcej niż jedną wizytę, to w związku z tym, że ich kolejność nie jest określona, powyższe rozwiązanie może zwrócić dla takich dni inne wyniki niż rozwiązanie Magdy, gdzie występuje pętla po indeksie. Aby kolejność była taka sama, trzeba by dane posortować po "AppointmentDay" i indeksie jednocześnie, ale biznesowo nie ma to większego sensu.*

### Counting recent canceled appointment for each patient (proposition):

Including the time window:

In [189]:
# Number of canceled visits for a given patient - in the last 90 days, without including the current visit
for idxs in dataset.sort_values("AppointmentDay").groupby("PatientId").groups.values():
    dataset.loc[idxs,"numberOfRecentCancellation"] = dataset.loc[idxs].\
            rolling(window='90D', on="AppointmentDay", min_periods=1, center=False, closed="left")["No-show"].sum().fillna(0)

dataset.sort_values(["AppointmentDay"]) 

Unnamed: 0,PatientId,AppointmentDay,No-show,xxxx,numberOfCancellation,numberOfRecentCancellation
4,ala,2013-01-11,1,2,1,0.0
6,ola,2013-01-15,1,2,1,0.0
2,ola,2013-01-21,0,2,1,1.0
0,ala,2013-02-01,1,2,2,1.0
1,ala,2013-02-01,0,2,2,2.0
9,ola,2013-02-11,1,2,2,1.0
3,ola,2013-03-12,1,2,3,2.0
8,ala,2013-05-09,1,2,3,0.0
7,ola,2013-08-01,0,2,3,0.0
5,ola,2013-09-01,0,2,3,0.0


Alternatively, withput the time window, but including only the last X visits:

In [190]:
# Number of canceled visits for a given patient - within the last 5 visits, without including the current one
dataset["numberOfRecentCancellation"] = dataset.sort_values("AppointmentDay").groupby("PatientId")["No-show"].\
        transform(lambda x: x.rolling(window=5, min_periods=1, center=False, closed="left").sum().fillna(0))

dataset.sort_values(["AppointmentDay"]) 

Unnamed: 0,PatientId,AppointmentDay,No-show,xxxx,numberOfCancellation,numberOfRecentCancellation
4,ala,2013-01-11,1,2,1,0
6,ola,2013-01-15,1,2,1,0
2,ola,2013-01-21,0,2,1,1
0,ala,2013-02-01,1,2,2,1
1,ala,2013-02-01,0,2,2,2
9,ola,2013-02-11,1,2,2,1
3,ola,2013-03-12,1,2,3,2
8,ala,2013-05-09,1,2,3,2
7,ola,2013-08-01,0,2,3,3
5,ola,2013-09-01,0,2,3,3
