**Load the required libraries and data**

In [1]:
# Libraries
import pandas as pd 
import numpy as np
from matplotlib import pyplot as plt 
import seaborn as sns 

In [2]:
# Data  
appointment_df = pd.read_csv("../data/Appointments List.csv", 
                             encoding='UTF-16', delimiter="\t")

  appointment_df = pd.read_csv("../data/Appointments List.csv",


# Explanatory Data Analysis
## Data Preparation

In [3]:
appointment_df.head()

Unnamed: 0,Appointment UID,Appt Status Description,Date Of Service,Chartnumber,NurseName,FirstTreatmentApptInd,Appt Type Group,Appt Type,Created By,Provider,Facility City,Room,Column Heading,#,Count of tblAppointments,Patient FID
0,Grand Total,Total,Total,Total,Total,Total,Total,Total,Total,Total,Total,Total,Total,Total,55975,*
1,1,Made,01/01/00,,,N,Default,,system,",",,Other,,1,1,1
2,109,Seen,01/22/18,1,,N,Intake,TMS INTAKE DR. BRENNER,EMILY,"BRENNER,DANIEL A",CAMBRIDGE,Other,DR. BRENNER,2,1,5283
3,239,Seen,01/22/18,2,,N,MH Infusion,KETAMINE MH -INFUSION ONLY,EMILY,"BRENNER,DANIEL A",CAMBRIDGE,Other,2C,3,1,5286
4,240,Made,01/22/18,9,,N,Intake,TMS INTAKE DR. BRENNER,EMILY,"BRENNER,DANIEL A",CAMBRIDGE,Other,DR. BRENNER,4,1,5351


In [4]:
appointment_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 55976 entries, 0 to 55975
Data columns (total 16 columns):
 #   Column                    Non-Null Count  Dtype 
---  ------                    --------------  ----- 
 0   Appointment UID           55976 non-null  object
 1   Appt Status Description   55976 non-null  object
 2   Date Of Service           37250 non-null  object
 3   Chartnumber               55975 non-null  object
 4   NurseName                 19321 non-null  object
 5   FirstTreatmentApptInd     54678 non-null  object
 6   Appt Type Group           54678 non-null  object
 7   Appt Type                 54677 non-null  object
 8   Created By                55976 non-null  object
 9   Provider                  55976 non-null  object
 10  Facility City             51738 non-null  object
 11  Room                      55976 non-null  object
 12  Column Heading            54697 non-null  object
 13  #                         55976 non-null  object
 14  Count of tblAppointmen

In [5]:
# Drop the first two rows 
appointment_df.drop(index=[0, 1], inplace=True)

In [6]:
appointment_df["Chartnumber"] = pd.to_numeric(appointment_df["Chartnumber"], errors="coerce")
appointment_df["Date"] = pd.to_datetime(appointment_df["Date Of Service"], errors="coerce")

  appointment_df["Date"] = pd.to_datetime(appointment_df["Date Of Service"], errors="coerce")


In [7]:
# Replace "Made" with "Seen"
appointment_df["Appt Status Description"].replace(to_replace="Made", value="Seen", inplace=True)

In [8]:
appointment_df["Facility City"].isna().sum()

4237

In [9]:
appointment_df["Facility City"] = appointment_df["Facility City"].ffill().backfill()

  appointment_df["Facility City"] = appointment_df["Facility City"].ffill().backfill()


In [10]:
# Confirm if there are still null values
appointment_df["Facility City"].isna().sum()

0

In [11]:
# Select the successfull appointments(visits)
seen_df = appointment_df[appointment_df["Appt Status Description"]=="Seen"]

# subset the data set 
subset_columns = ["Date", "Chartnumber", "Appt Type Group", "Facility City"]
seen_df = seen_df[subset_columns]

# Drop any record with null values 
seen_df.dropna(how="any", inplace=True)

In [12]:
# Calculate the percentage of patients who start with intake 
count = 0
chart_numbers = seen_df["Chartnumber"].unique()

for chart_no in chart_numbers:
    # Filter the data set 
    df_by_chart_no = seen_df[seen_df["Chartnumber"]==chart_no].sort_values(by="Date")
    df_by_chart_no.reset_index(drop=True, inplace=True)
    
    if df_by_chart_no["Appt Type Group"][0]=="Intake":
        count+=1
    else:
        pass
    
# Patients enrolled with an intake    
count/seen_df["Chartnumber"].nunique()

0.8556767158434894

approximately 86% of the patients enrolled with an intake instead of treatment or follow up

Features/Variables to include in the post intake dataset 

* Chart number 
* First Service after enrollment
* First Service after intake
* Days after intake
* Days between MH Infusion and Intake 
* Number of visits
* Number of MH infusions

* Number of MH infusions outside cambridge 
* Number of visits outside cambridge

In [22]:
first_service_after_enrollment = []
first_service_after_intake = []
days_after_intake = []
days_btn_mhinfusion_intake = []
visit_count = []
mhInfusion_count = []

# Additional Modifications
infusion_count_outside_cambridge = []
visit_count_outside_cambridge = []
intake_facility_city = [] # not yet implemented



for chart_no in chart_numbers:
    # Filter the data set by chart number
    df_by_chart_no = seen_df[seen_df["Chartnumber"]==chart_no].sort_values(by="Date")
    df_by_chart_no.reset_index(drop=True, inplace=True)
    
    # Get the required data 
    first_service_after_enrollment.append(df_by_chart_no["Appt Type Group"][0])
    visit_count.append(df_by_chart_no.shape[0])
    mhInfusion_count.append(df_by_chart_no["Appt Type Group"].to_list().count('MH Infusion'))
    
    # Get the additional data 
    ## Filter out Cambridge visits from the data
    outside_cambridge_df = df_by_chart_no[df_by_chart_no["Facility City"]!="CAMBRIDGE"]
    visit_count_outside_cambridge.append(outside_cambridge_df.shape[0])
    
    ## Count the number of infusions outside cambridge
    infusion_count_outside_cambridge.append(outside_cambridge_df["Appt Type Group"].tolist().count('MH Infusion'))
    
    # The below line checks if there is another service before intake
    if "Intake" in df_by_chart_no["Appt Type Group"].tolist():
        print(chart_no)
        print(df_by_chart_no["Appt Type Group"].tolist().index("Intake"))
    
    if df_by_chart_no["Appt Type Group"][0] == "Intake":
        # Check if there exists multiple services
        if df_by_chart_no.shape[0] > 1:
            first_service_after_intake.append(df_by_chart_no["Appt Type Group"][1])

            # Calculate days after intake
            days_1 = (df_by_chart_no["Date"][1] - df_by_chart_no["Date"][0]).days
            days_after_intake.append(days_1)

            # Calculate days between intake and MH infusion
            try:
                mhInfusion_index = df_by_chart_no["Appt Type Group"].tolist().index('MH Infusion')
                days_2 = (df_by_chart_no["Date"][mhInfusion_index] - df_by_chart_no["Date"][0]).days
                days_btn_mhinfusion_intake.append(days_2)
            except ValueError: 
                days_btn_mhinfusion_intake.append(np.nan)
                
        # Add another else if for patients whose had intakes but not as their first service
        
            
        else:
            first_service_after_intake.append(np.nan)
            days_after_intake.append(np.nan)
            days_btn_mhinfusion_intake.append(np.nan)
        
    else: 
        first_service_after_intake.append(np.nan)
        days_after_intake.append(np.nan)
        days_btn_mhinfusion_intake.append(np.nan)

1
0
9
0
6
0
12
0
5
0
3
0
13
0
118
1
24
13
119
0
52
57
116
0
122
0
123
0
125
0
127
0
131
0
132
0
133
0
134
0
135
0
136
0
137
0
128
0
138
0
139
0
141
0
143
0
144
0
149
0
152
0
150
0
157
63
159
0
160
0
162
0
163
0
168
0
170
0
171
0
166
0
174
0
175
0
176
0
179
0
156
0
180
0
185
0
188
0
189
0
191
0
193
0
194
0
192
0
200
0
201
0
204
0
206
0
207
0
210
0
195
0
212
0
211
0
213
0
215
0
216
0
209
0
217
0
218
0
220
0
223
0
224
0
225
0
226
0
178
0
229
0
227
0
231
8
235
0
237
0
238
0
239
0
240
0
241
0
242
0
244
0
245
0
248
0
251
0
252
0
255
0
258
0
250
0
243
0
264
0
268
0
271
0
273
0
276
3
283
0
272
0
286
0
285
0
290
0
291
0
292
0
293
0
294
0
300
0
302
0
287
0
304
0
307
0
314
0
310
0
320
0
325
0
321
0
327
0
274
0
332
0
318
0
333
0
312
0
328
0
336
0
329
0
323
0
339
0
341
0
85
0
340
0
343
0
349
0
344
0
350
0
353
0
359
0
351
0
364
0
315
0
362
0
365
0
374
0
376
0
358
0
375
0
382
0
384
0
383
0
377
0
387
0
386
0
388
0
389
0
390
0
391
0
393
0
394
0
396
0
399
0
405
0
417
0
414
0
411
0
420
0
424
0
422
0
426


2301
0
2304
0
2298
0
2276
0
2306
0
2305
0
2287
0
2314
0
2201
0
2316
0
2319
0
2302
0
2310
0
2325
0
2308
0
2322
0
2327
0
2330
0
2206
0
2340
0
2332
0
2335
0
2318
0
2328
0
2338
0
2209
0
2336
0
2284
0
2331
0
2339
0
2350
0
2347
0
2354
0
2359
0
2352
0
2351
0
2357
0
2361
0
2371
0
2367
0
2370
0
2369
0
2368
0
2377
0
2380
0
2385
0
2383
0
2375
0
2413
0
2428
0
2455
0
2457
0
2482
0
2487
0
2493
0
2503
0
2491
0
2485
0
2516
0
2544
0
2565
0
2548
0
2582
0
2657
0
2669
0
2674
0


In [14]:
stop here

SyntaxError: invalid syntax (4067800170.py, line 1)

In [None]:
seen_df[(seen_df["Facility City"]!="CAMBRIDGE")&(seen_df["Chartnumber"]==1909)]

In [29]:
seen_df[seen_df["Chartnumber"]==24].reset_index().head(20)

Unnamed: 0,index,Date,Chartnumber,Appt Type Group,Facility City
0,131,2018-01-29,24,TMS,CAMBRIDGE
1,136,2018-01-30,24,TMS,CAMBRIDGE
2,137,2018-01-31,24,TMS,CAMBRIDGE
3,138,2018-02-01,24,TMS,CAMBRIDGE
4,304,2018-02-02,24,TMS,CAMBRIDGE
5,305,2018-02-05,24,TMS,CAMBRIDGE
6,371,2018-02-06,24,TMS,CAMBRIDGE
7,372,2018-02-07,24,TMS,CAMBRIDGE
8,373,2018-02-08,24,TMS,CAMBRIDGE
9,374,2018-02-09,24,TMS,CAMBRIDGE


In [None]:
stop here 

In [None]:
# Create a dataframe  
post_intake_df = pd.DataFrame({
    "Chartnumber" : chart_numbers,
    "Visit Count":visit_count,
    "First Service After Enrollment" : first_service_after_enrollment,
    "First Service After Intake":first_service_after_intake,
    "Days After Intake":days_after_intake,
    "Days between MH Infusion and Intake": days_btn_mhinfusion_intake,
    "Number of MH infusions": mhInfusion_count,
    "Number of MH infusions outside Cambridge":infusion_count_outside_cambridge,
    "Number of Visits outside Cambridge":visit_count_outside_cambridge
})

In [None]:
post_intake_df

In [None]:
stop here

In [None]:
seen_df[seen_df["Chartnumber"]==10]["Appt Type Group"].tolist().index('MH Infusion')

In [None]:
appointment_df[appointment_df["Facility City"]!="CAMBRIDGE"]

In [None]:
appointment_df[appointment_df["Chartnumber"]==2194]

## Data Analysis and Visualization

# Conclusion

**Challenges**

- Not all patients in the records started with an intake 
- It is cumbersome to focus this analysis on cambridge since some patients visited cambridge and other facilities for instance chart# 2194. - Fix this by counting the number of visits and infusions outside the Cambridge facility.
- There are some patients that had other services before intake for instance chart#s 24 and 118