**Load the required libraries and data**

In [1]:
# Libraries
import pandas as pd 
import numpy as np
from matplotlib import pyplot as plt 
import seaborn as sns 

In [2]:
# Data  
appointment_df = pd.read_csv("../data/Appointments List.csv", 
                             encoding='UTF-16', delimiter="\t")

  appointment_df = pd.read_csv("../data/Appointments List.csv",


# Explanatory Data Analysis
## Data Preparation

In [3]:
appointment_df.head()

Unnamed: 0,Appointment UID,Appt Status Description,Date Of Service,Chartnumber,NurseName,FirstTreatmentApptInd,Appt Type Group,Appt Type,Created By,Provider,Facility City,Room,Column Heading,#,Count of tblAppointments,Patient FID
0,Grand Total,Total,Total,Total,Total,Total,Total,Total,Total,Total,Total,Total,Total,Total,55975,*
1,1,Made,01/01/00,,,N,Default,,system,",",,Other,,1,1,1
2,109,Seen,01/22/18,1,,N,Intake,TMS INTAKE DR. BRENNER,EMILY,"BRENNER,DANIEL A",CAMBRIDGE,Other,DR. BRENNER,2,1,5283
3,239,Seen,01/22/18,2,,N,MH Infusion,KETAMINE MH -INFUSION ONLY,EMILY,"BRENNER,DANIEL A",CAMBRIDGE,Other,2C,3,1,5286
4,240,Made,01/22/18,9,,N,Intake,TMS INTAKE DR. BRENNER,EMILY,"BRENNER,DANIEL A",CAMBRIDGE,Other,DR. BRENNER,4,1,5351


In [4]:
appointment_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 55976 entries, 0 to 55975
Data columns (total 16 columns):
 #   Column                    Non-Null Count  Dtype 
---  ------                    --------------  ----- 
 0   Appointment UID           55976 non-null  object
 1   Appt Status Description   55976 non-null  object
 2   Date Of Service           37250 non-null  object
 3   Chartnumber               55975 non-null  object
 4   NurseName                 19321 non-null  object
 5   FirstTreatmentApptInd     54678 non-null  object
 6   Appt Type Group           54678 non-null  object
 7   Appt Type                 54677 non-null  object
 8   Created By                55976 non-null  object
 9   Provider                  55976 non-null  object
 10  Facility City             51738 non-null  object
 11  Room                      55976 non-null  object
 12  Column Heading            54697 non-null  object
 13  #                         55976 non-null  object
 14  Count of tblAppointmen

In [5]:
# Drop the first two rows 
appointment_df.drop(index=[0, 1], inplace=True)

In [6]:
appointment_df["Chartnumber"] = pd.to_numeric(appointment_df["Chartnumber"], errors="coerce")
appointment_df["Date"] = pd.to_datetime(appointment_df["Date Of Service"], errors="coerce")

  appointment_df["Date"] = pd.to_datetime(appointment_df["Date Of Service"], errors="coerce")


In [7]:
# Replace "Made" with "Seen"
appointment_df["Appt Status Description"].replace(to_replace="Made", value="Seen", inplace=True)

In [8]:
# Select the successfull appointments(visits)
seen_df = appointment_df[appointment_df["Appt Status Description"]=="Seen"]

# subset the data set 
subset_columns = ["Date", "Chartnumber", "Appt Type Group"]
seen_df = seen_df[subset_columns]

# Drop any record with null values 
seen_df.dropna(how="any", inplace=True)

In [9]:
# Calculate the percentage of patients who start with intake 
count = 0
chart_numbers = seen_df["Chartnumber"].unique()

for chart_no in chart_numbers:
    # Filter the data set 
    df_by_chart_no = seen_df[seen_df["Chartnumber"]==chart_no].sort_values(by="Date")
    df_by_chart_no.reset_index(drop=True, inplace=True)
    
    if df_by_chart_no["Appt Type Group"][0]=="Intake":
        count+=1
    else:
        pass
    
# Patients enrolled with an intake    
count/seen_df["Chartnumber"].nunique()

0.8556767158434894

approximately 86% of the patients enrolled with an intake instead of treatment or follow up

Features/Variables to include in the post intake dataset 

* Chart number 
* First Service after enrollment
* First Service after intake
* Days after intake
* Days between MH Infusion and Intake 
* Number of visits
* Number of MH infusions

In [10]:
first_service_after_enrollment = []
first_service_after_intake = []
days_after_intake = []
days_btn_mhinfusion_intake = []
visit_count = []
mhInfusion_count = []


for chart_no in chart_numbers:
    # Filter the data set by chart number
    df_by_chart_no = seen_df[seen_df["Chartnumber"]==chart_no].sort_values(by="Date")
    df_by_chart_no.reset_index(drop=True, inplace=True)
    
    # Get the required data 
    first_service_after_enrollment.append(df_by_chart_no["Appt Type Group"][0])
    visit_count.append(df_by_chart_no.shape[0])
    mhInfusion_count.append(df_by_chart_no["Appt Type Group"].to_list().count('MH Infusion'))
    
    if df_by_chart_no["Appt Type Group"][0] == "Intake":
        # Check if there exists multiple services
        if df_by_chart_no.shape[0] > 1:
            first_service_after_intake.append(df_by_chart_no["Appt Type Group"][1])

            # Calculate days after intake
            days_1 = (df_by_chart_no["Date"][1] - df_by_chart_no["Date"][0]).days
            days_after_intake.append(days_1)

            # Calculate days between intake and MH infusion
            try:
                mhInfusion_index = df_by_chart_no["Appt Type Group"].tolist().index('MH Infusion')
                days_2 = (df_by_chart_no["Date"][mhInfusion_index] - df_by_chart_no["Date"][0]).days
                days_btn_mhinfusion_intake.append(days_2)
            except ValueError: 
                days_btn_mhinfusion_intake.append(np.nan)
            
        else:
            first_service_after_intake.append(np.nan)
            days_after_intake.append(np.nan)
            days_btn_mhinfusion_intake.append(np.nan)
        
    else: 
        first_service_after_intake.append(np.nan)
        days_after_intake.append(np.nan)
        days_btn_mhinfusion_intake.append(np.nan)

In [11]:
# Create a dataframe  
post_intake_df = pd.DataFrame({
    "Chartnumber" : chart_numbers,
    "First Service After Enrollment" : first_service_after_enrollment,
    "First Service After Intake":first_service_after_intake,
    "Days After Intake":days_after_intake,
    "Days between MH Infusion and Intake": days_btn_mhinfusion_intake,
    "Number of MH infusions": mhInfusion_count
})

In [13]:
post_intake_df

Unnamed: 0,Chartnumber,First Service After Enrollment,First Service After Intake,Days After Intake,Days between MH Infusion and Intake,Number of MH infusions
0,1,Intake,TMS,14.0,,0
1,2,MH Infusion,,,,8
2,9,Intake,TMS,9.0,66.0,30
3,6,Intake,MH Infusion,0.0,0.0,13
4,12,Intake,,,,0
...,...,...,...,...,...,...
1554,2647,MH Infusion,,,,5
1555,2661,KAP Infusion,,,,2
1556,2669,Intake,,,,0
1557,2674,Intake,,,,0


In [12]:
stop here

SyntaxError: invalid syntax (4067800170.py, line 1)

In [None]:
seen_df[seen_df["Chartnumber"]==10]["Appt Type Group"].tolist().index('MH Infusion')

## Data Analysis and Visualization

# Conclusion

**Challenges**

- Not all patients in the records started with an intake 
- 