In [1]:
import pandas as pd
import dtale
import plotly.express as px 
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import re
import time
from datetime import datetime
import matplotlib.dates as mdates
import matplotlib.ticker as ticker
from urllib.request import urlopen
from bs4 import BeautifulSoup
import requests

# Imported Clean CSV

In [2]:
df = pd.read_csv("../data/outcomes.csv")

df.head()

Unnamed: 0.1,Unnamed: 0,CallerNum,Treatment Plan - Is the treatment plan completed?,"Treatment Plan - Was the enrollee able to be restored to function (can chew) and ""social six"" esthetics (top front six teeth are present and disease free)",Birth_Year,Presumptive Eligibility - Do they pre-qualify for SMILE ON 60+?,Base-line Oral and Conditions Questions - Are you limited in what you can eat?,Treatment Plan - Was a treatment plan developed?
0,0,3638169,,,1960,Yes,Yes,
1,1,4431194,,,1959,Yes,Yes,
2,2,2941341,No,No,1942,Yes,No,Yes
3,3,3823933,No,No,1960,Yes,Yes,Yes
4,4,3348256,No,No,1955,Yes,Yes,Yes


# Value Counts for two main metrics: Completed Plan and Restorated Function (with duplicate patient ids)

In [3]:
print(df['Treatment Plan - Is the treatment plan completed?'].value_counts())
print(df['Treatment Plan - Was the enrollee able to be restored to function (can chew) and "social six" esthetics (top front six teeth are present and disease free)'].value_counts())

No     19382
Yes     7823
Name: Treatment Plan - Is the treatment plan completed?, dtype: int64
No     12260
Yes     9794
Name: Treatment Plan - Was the enrollee able to be restored to function (can chew) and "social six" esthetics (top front six teeth are present and disease free), dtype: int64


# Rename Columns for counting purposes

In [4]:
new_names = {'CallerNum':'Patient_id',
             'Treatment Plan - Is the treatment plan completed?':'Plan_Completed',
             'Treatment Plan - Was the enrollee able to be restored to function (can chew) and "social six" esthetics (top front six teeth are present and disease free)':'Restored_Function',
             'Treatment Plan - Was a treatment plan developed?':'Plan_developed',
             'Base-line Oral and Conditions Questions - Are you limited in what you can eat?':'Limited'}

In [5]:
df.rename(columns=new_names, inplace=True)
df.head()

Unnamed: 0.1,Unnamed: 0,Patient_id,Plan_Completed,Restored_Function,Birth_Year,Presumptive Eligibility - Do they pre-qualify for SMILE ON 60+?,Limited,Plan_developed
0,0,3638169,,,1960,Yes,Yes,
1,1,4431194,,,1959,Yes,Yes,
2,2,2941341,No,No,1942,Yes,No,Yes
3,3,3823933,No,No,1960,Yes,Yes,Yes
4,4,3348256,No,No,1955,Yes,Yes,Yes


# Sorted and dropped duplicates of Patient ids

In [6]:
sorted = df.sort_values(['Patient_id',
                         'Plan_Completed',
                         'Restored_Function',
                         'Plan_developed',
                         'Limited']).drop_duplicates(['Patient_id'], keep='first')
sorted.head()

Unnamed: 0.1,Unnamed: 0,Patient_id,Plan_Completed,Restored_Function,Birth_Year,Presumptive Eligibility - Do they pre-qualify for SMILE ON 60+?,Limited,Plan_developed
47543,47543,-2,,,1953,No,No,Yes
609,609,-1,No,No,1900,,,Yes
34169,34169,2384106,No,No,1944,Yes,Yes,Yes
3701,3701,2384744,No,No,1954,Yes,No,Yes
54215,54215,2384833,Yes,Yes,1942,Yes,No,Yes


In [7]:
print(sorted['Plan_Completed'].value_counts())
print(sorted['Restored_Function'].value_counts())
print(sorted['Plan_developed'].value_counts())
print(sorted['Limited'].value_counts())

No     5522
Yes     883
Name: Plan_Completed, dtype: int64
No     4179
Yes    1411
Name: Restored_Function, dtype: int64
Yes    5745
No      698
Name: Plan_developed, dtype: int64
Yes    5089
No     3946
Name: Limited, dtype: int64


In [8]:
l_no_na = sorted[sorted['Limited'].notna()]
pd_no_na = sorted[sorted['Plan_developed'].notna()]
pc_no_na = sorted[sorted['Plan_Completed'].notna()]
rf_no_na = sorted[sorted['Restored_Function'].notna()]


# Sorted by Submetric: Limited(chew capability), Plan Developed, Plan Completed, and Restored Function.

## Documented that all submetrics were answered as 'Yes'

In [9]:
doc_All = sorted[(sorted['Limited'] == 'Yes') & 
                       (sorted['Plan_developed'] == 'Yes') & 
                       (sorted['Plan_Completed'] == 'Yes') & 
                       (sorted['Restored_Function'] == 'Yes') ]
doc_All.head(5)

Unnamed: 0.1,Unnamed: 0,Patient_id,Plan_Completed,Restored_Function,Birth_Year,Presumptive Eligibility - Do they pre-qualify for SMILE ON 60+?,Limited,Plan_developed
52758,52758,2386054,Yes,Yes,1956,Yes,Yes,Yes
48392,48392,2386075,Yes,Yes,1928,Yes,Yes,Yes
54439,54439,2387558,Yes,Yes,1956,Yes,Yes,Yes
42557,42557,2391110,Yes,Yes,1946,Yes,Yes,Yes
53674,53674,2392454,Yes,Yes,1949,Yes,Yes,Yes


In [10]:
print(doc_All['Restored_Function'].value_counts())

Yes    238
Name: Restored_Function, dtype: int64


### Percentages

In [11]:
doc_All[doc_All['Restored_Function'] == 'Yes'].describe
doc_All[doc_All['Restored_Function'] == 'Yes'].value_counts(normalize = True)

Unnamed: 0  Patient_id  Plan_Completed  Restored_Function  Birth_Year  Presumptive Eligibility - Do they pre-qualify for SMILE ON 60+?  Limited  Plan_developed
95          4541680     Yes             Yes                1957        Yes                                                              Yes      Yes               0.004386
44510       2685730     Yes             Yes                1958        Yes                                                              Yes      Yes               0.004386
44677       2689096     Yes             Yes                1954        Yes                                                              Yes      Yes               0.004386
44679       2689081     Yes             Yes                1953        Yes                                                              Yes      Yes               0.004386
44688       2698319     Yes             Yes                1957        Yes                                                              Yes      Yes    

In [12]:
completed_nc = sorted[(sorted['Limited'] == 'Yes') & 
                      (sorted['Plan_developed'] == 'Yes') & 
                      #(sorted['Plan_Completed'] == 'No') & 
                      (sorted['Restored_Function'] == 'Yes') ]
completed_nc.head(5)

Unnamed: 0.1,Unnamed: 0,Patient_id,Plan_Completed,Restored_Function,Birth_Year,Presumptive Eligibility - Do they pre-qualify for SMILE ON 60+?,Limited,Plan_developed
52758,52758,2386054,Yes,Yes,1956,Yes,Yes,Yes
48392,48392,2386075,Yes,Yes,1928,Yes,Yes,Yes
54439,54439,2387558,Yes,Yes,1956,Yes,Yes,Yes
49300,49300,2390918,,Yes,1927,Yes,Yes,Yes
42557,42557,2391110,Yes,Yes,1946,Yes,Yes,Yes


In [13]:
print(completed_nc['Restored_Function'].value_counts())


Yes    480
Name: Restored_Function, dtype: int64


In [14]:
completed_np = sorted[(sorted['Limited'] == 'Yes') & 
                      #(sorted['Plan_developed'] == 'No') & 
                      #(sorted['Plan_Completed'] == 'No') & 
                      (sorted['Restored_Function'] == 'Yes') ]
completed_np.head(5)

Unnamed: 0.1,Unnamed: 0,Patient_id,Plan_Completed,Restored_Function,Birth_Year,Presumptive Eligibility - Do they pre-qualify for SMILE ON 60+?,Limited,Plan_developed
52758,52758,2386054,Yes,Yes,1956,Yes,Yes,Yes
48392,48392,2386075,Yes,Yes,1928,Yes,Yes,Yes
54439,54439,2387558,Yes,Yes,1956,Yes,Yes,Yes
49300,49300,2390918,,Yes,1927,Yes,Yes,Yes
42557,42557,2391110,Yes,Yes,1946,Yes,Yes,Yes


In [15]:
print(completed_np['Restored_Function'].value_counts())

Yes    535
Name: Restored_Function, dtype: int64


In [16]:
completed_rf = sorted[(sorted['Limited'] == 'No') & 
                      (sorted['Plan_developed'] == 'No') & 
                      (sorted['Plan_Completed'] == 'No') & 
                      (sorted['Restored_Function'] == 'Yes') ]
completed_rf.head(5)

Unnamed: 0.1,Unnamed: 0,Patient_id,Plan_Completed,Restored_Function,Birth_Year,Presumptive Eligibility - Do they pre-qualify for SMILE ON 60+?,Limited,Plan_developed
56321,56321,2452540,No,Yes,1951,Yes,No,No
39602,39602,2486273,No,Yes,1958,Yes,No,No
43947,43947,2581942,No,Yes,1951,Yes,No,No
44329,44329,2623392,No,Yes,1949,Yes,No,No
44124,44124,2813874,No,Yes,1956,Yes,No,No


In [17]:
print(completed_rf['Restored_Function'].value_counts())

Yes    13
Name: Restored_Function, dtype: int64


In [18]:
completed_nl = sorted[#(sorted['Plan_developed'] == 'No') & 
                      (sorted['Plan_Completed'] == 'Yes')& 
                      (sorted['Restored_Function'] == 'Yes')]# &
                     #(sorted['Limited'] == 'No')]
completed_nl.head(5)

Unnamed: 0.1,Unnamed: 0,Patient_id,Plan_Completed,Restored_Function,Birth_Year,Presumptive Eligibility - Do they pre-qualify for SMILE ON 60+?,Limited,Plan_developed
54215,54215,2384833,Yes,Yes,1942,Yes,No,Yes
27851,27851,2384932,Yes,Yes,1952,Yes,,Yes
53228,53228,2384971,Yes,Yes,1951,Yes,No,Yes
56333,56333,2385168,Yes,Yes,1952,Yes,No,Yes
48129,48129,2385321,Yes,Yes,1951,Yes,,Yes


In [19]:
print(completed_nl['Restored_Function'].value_counts())

Yes    743
Name: Restored_Function, dtype: int64


# DISCLAIMER: 'df' refers to the data you passed in when calling 'dtale.show'

duplicates = sorted.duplicated(['Patient_id'], keep='first')
sorted= sorted[~duplicates]
if isinstance(sorted, (pd.DatetimeIndex, pd.MultiIndex)):
    sorted = sorted.to_frame(index=False)

# remove any pre-existing indices for ease of use in the D-Tale code, but this is not required
sorted = sorted.reset_index().drop('index', axis=1, errors='ignore')
sorted.columns = [str(c) for c in df.columns]  # update columns to strings in case they are numbers

sorted.head()

print(sorted['Plan_Completed'].value_counts())
print(sorted['Restored_Function'].value_counts())

In [20]:
completed_both = sorted[(sorted['Plan_Completed'] == "Yes") 
                        &  (sorted['Restored_Function'] == 'Yes')]
completed_both.head(10)

Unnamed: 0.1,Unnamed: 0,Patient_id,Plan_Completed,Restored_Function,Birth_Year,Presumptive Eligibility - Do they pre-qualify for SMILE ON 60+?,Limited,Plan_developed
54215,54215,2384833,Yes,Yes,1942,Yes,No,Yes
27851,27851,2384932,Yes,Yes,1952,Yes,,Yes
53228,53228,2384971,Yes,Yes,1951,Yes,No,Yes
56333,56333,2385168,Yes,Yes,1952,Yes,No,Yes
48129,48129,2385321,Yes,Yes,1951,Yes,,Yes
52758,52758,2386054,Yes,Yes,1956,Yes,Yes,Yes
48392,48392,2386075,Yes,Yes,1928,Yes,Yes,Yes
50716,50716,2386386,Yes,Yes,1953,Yes,No,Yes
54439,54439,2387558,Yes,Yes,1956,Yes,Yes,Yes
54678,54678,2387627,Yes,Yes,1953,Yes,,Yes


In [21]:
print(completed_both['Plan_Completed'].value_counts())

Yes    743
Name: Plan_Completed, dtype: int64


In [22]:
print(completed_both['Patient_id'].value_counts())

2830336    1
2924853    1
2393764    1
2641164    1
2893859    1
          ..
3089764    1
2560356    1
3056404    1
2872678    1
2721448    1
Name: Patient_id, Length: 743, dtype: int64


In [23]:
#dental_df[dental_df['Are you limited to what you eat?'] == 'Yes']['Age'].describe
#dental_df[dental_df['Are you limited to what you eat?'] == 'Yes']['Age'].value_counts(normalize = True)

In [24]:
d = dtale.show(completed_both)

#open it in a new window in browser

d.open_browser()

Executing shutdown due to inactivity...


2021-11-16 21:18:01,053 - INFO     - Executing shutdown due to inactivity...


Executing shutdown...


2021-11-16 21:18:17,106 - INFO     - Executing shutdown...
