In [12]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix

In [13]:
df = pd.read_csv("combined_df.csv")

In [14]:
df.head(10)

Unnamed: 0.1,Unnamed: 0,intake_method,referer,age,appointment_location,appointment_time,Medication_Management,Talk_Therapy
0,0,Call,Bariatric Doctor,20-30,Virtual,Evening,0,1
1,1,Call,Family/Friend,10-20,Virtual,Afternoon,0,1
2,2,Boom Form,Family/Friend,20-30,Freehold,Afternoon,0,1
3,3,Boom Form,Family/Friend,20-30,Freehold,Afternoon,0,1
4,4,Boom Form,Family/Friend,30-40,Freehold,Afternoon,0,1
5,5,Boom Form,Family/Friend,20-30,Virtual,Afternoon,0,1
6,6,Boom Form,Family/Friend,30-40,Virtual,Afternoon,0,1
7,7,Boom Form,Family/Friend,20-30,Virtual,Afternoon,1,0
8,8,Boom Form,Family/Friend,30-40,Princeton,Afternoon,0,1
9,9,Boom Form,Family/Friend,20-30,Princeton,Afternoon,0,1


In [15]:
df['is_morning'] = (df['appointment_time'].str.lower() == 'morning').astype(int)

In [16]:
X = df[['intake_method', 'referer', 'age', 'appointment_location']]
y = df['is_morning']
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size= .2, random_state = 42)

In [17]:
df.drop(columns= ['Unnamed: 0'], inplace= True)
df.head()

Unnamed: 0,intake_method,referer,age,appointment_location,appointment_time,Medication_Management,Talk_Therapy,is_morning
0,Call,Bariatric Doctor,20-30,Virtual,Evening,0,1,0
1,Call,Family/Friend,10-20,Virtual,Afternoon,0,1,0
2,Boom Form,Family/Friend,20-30,Freehold,Afternoon,0,1,0
3,Boom Form,Family/Friend,20-30,Freehold,Afternoon,0,1,0
4,Boom Form,Family/Friend,30-40,Freehold,Afternoon,0,1,0


In [18]:
categorical_features = ['intake_method', 'referer', 'age', 'appointment_location']
numeric_features = ['Medication_Management', 'Talk_Therapy']

In [19]:
preprocessor = ColumnTransformer([('cat', OneHotEncoder(handle_unknown = 'ignore'), categorical_features)], remainder = 'passthrough')

In [20]:
pipeline = Pipeline([('preprocessor', preprocessor), ('classifier', LogisticRegression(max_iter = 1000))])

In [21]:
pipeline.fit(X_train ,y_train)
y_pred = pipeline.predict(X_test)

In [22]:
print("Classification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n" , confusion_matrix(y_test, y_pred))

Classification Report:
               precision    recall  f1-score   support

           0       0.75      0.98      0.85       267
           1       0.40      0.04      0.08        89

    accuracy                           0.74       356
   macro avg       0.58      0.51      0.47       356
weighted avg       0.67      0.74      0.66       356

Confusion Matrix:
 [[261   6]
 [ 85   4]]
