# Plots for Smiles United Project - PRE
This note book is all the plots created from the pre-training data

In [None]:
import pandas as pd
import numpy as np
import scipy.stats as stats

import seaborn as sns
sns.set_style('darkgrid', {'axes.facecolor': '0.9', "grid.color": ".6", "grid.linestyle": ":"})
sns.set_context("talk")

import folium

import matplotlib.pyplot as plt
from matplotlib import patches as mpatches
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

## Minutes to complete survey

In [None]:
time = pd.read_pickle("../saved_data_frames/time_df.pkl")
time.head()

In [None]:
fig, axes = plt.subplots(ncols=2, nrows=1, figsize=(10,5))
fig.set_tight_layout(True)
fig.suptitle("Minutes to complete survey", fontsize=20)

axes[0] = sns.histplot(data=time[['mins']], ax=axes[0])
axes[0].get_legend().remove()
axes[0].set(title='Histogram');

axes[1] = sns.boxplot(data=time[['mins']], saturation=0.9, color="tab:blue", ax=axes[1], orient="h")
axes[1].set(title='Bolxplot')
axes[1].tick_params(left=False)
axes[1].set(yticklabels=[]);

In [None]:
import math

meanpointprops = dict(marker='D', markeredgecolor='black',
                      markerfacecolor='firebrick')
trimmed = time[(np.abs(stats.zscore(time['mins'])) < 3)][['mins']]

fig, axes = plt.subplots(ncols=2, nrows=1, figsize=(15,5))
fig.set_tight_layout(True)
fig.suptitle("Minutes to complete - with 1 extreme outlier removed", fontsize=25)


axes[0] = sns.histplot(data=trimmed,
                       color="tab:blue", 
                       ax=axes[0])
axes[0].axvline(trimmed.mean()[0], 
                color='firebrick', 
                lw=2, 
                ls='--', 
                label=f"Average time to complete survey: {math.floor(trimmed.mean()[0])}m:{round((trimmed.mean()[0]*60) % 60)}s")
axes[0].legend(facecolor="white")
axes[0].set(title='Histogram');


axes[1] = sns.boxplot(data=trimmed, 
                      #color="tab:blue", 
                      ax=axes[1], 
                      orient="h", 
                      meanprops=meanpointprops,
                      showmeans=True)
axes[1].set(title='Bolxplot')
axes[1].tick_params(left=False)
#axes[1].yaxis.grid(True)
axes[1].set(yticklabels=[]);
plt.savefig("../images/pre_intervention/mins_to_complete",bbox_inches='tight')

In [None]:
# confirm only 1 has been removed
print(f"original: {time.shape[0]}")
print(f"trimmed: {trimmed.shape[0]}")
print("-"*13)
difference = time.shape[0]-trimmed.shape[0]
print(f"# removed: {difference}")

## Completed Surveys

In [None]:
totals = pd.read_pickle("../saved_data_frames/totals_df.pkl")
totals

In [None]:
colors = ['C10', 'C12']
plt.figure(figsize=(6,5))
plt.title("Total responses vs. Completed responses", fontsize=20)
sns.barplot(data = totals.T, palette=colors).set(ylabel='count')

plt.legend(handles = [mpatches.Patch(color=i) for i in colors],
           labels=[f"{i}: {totals['count'][i]}" for i in totals.T.columns],
           facecolor="white", 
           fontsize=13);
plt.savefig("../images/pre_intervention/total_v_complete", bbox_inches='tight')

## Plotting Function

In [None]:
# make a function for plots
def distributions(data, 
                  title, 
                  rotate_xlabels=False, 
                  figsize=(15,8), 
                  order=None, 
                  title_fontsize=20,
                  legend_outside=False,
                  filepath=None):
    
    df = pd.DataFrame(data.value_counts())
    colors = ['C0','C1','C2','C3','C4','C5','C6','C7','C8','C9','C10']
    if order != None:
        if order == 'agree':
            colors = ['#2bdb1f','#c4fa2f','#fae22f','#fa942f','#fa2f2f']
            df = df.T[['Strongly Agree',
                       'Agree',
                       'Neutral',
                       'Disagree',
                       'Strongly Disagree']].T
        
        elif order == 'percent':
            colors = ['#C97EFE','#AD70D8','#9963BF','#8053A0','#684382']
            df = df.T[['None',
                       'Less than 25%',
                       '25% to 50%',
                       '50% to 75%',
                       'Greater than 75%']].T
        
        elif order == 'frequency_brush':
            colors = ['#82C5FE','#6DA5D5','#5782A8','#456886']
            df = df.T[['Less than 1 time each month',
                       '2 to 3 times each week',
                       '1 time each day',
                       '2 to 3 times each day']].T
        
        elif order == 'frequency_floss':
            colors = ['#8AC386','#73B06F','#5E9C5A','#498745','#3A7536', "#2C6329"]
            df = df.T[['Less than 1 time each month',
                       '1 time each month',
                       '1 time each week',
                       '2 to 3 times each week',
                       '1 time each day',
                       '2 to 3 times each day']].T
        
        elif order == 'often':
            colors = ['#FDC384','#FEAC54','#FD982B','#FD8300']
            df = df.T[['Never','Not often','Somewhat often','Very often']].T
        
        elif order == 'true_false':
            colors = ['C24', 'C19']
            df = df.T[['True','False']].T
        
        elif order == 'k_brush':
            colors = ['#82C5FE','#70AADB','#6193BD','#527CA0','#456886']
            df = df.T[['Less than 1 time each month',
                       '1 time each week',
                       '2 to 3 times each week',
                       '1 time each day',
                       '2 to 3 times each day']].T
            
        elif order == 'frequency_visit_dentist':
            colors = ['#83FED7','#72DCBA','#61BDA0','#519F86','#44846F', # gradiant
                      'C0','C1','C3','C4','C5'] # random 

            df = df.T[['Only when they have pain',
                       'Less than once every 12 months',
                       'Once every 12 months',
                       'Once every 6 months',
                       'Once every 3 months',
                       'Unknown',
                       'Not Specified',
                       'N/A, children are too young',
                       'Difficult due to non-compliance',
                       'Scheduled dental van visits']].T

    df.columns = ['count']
    
    plt.figure(figsize=figsize)
    plt.title(title, fontsize=title_fontsize)
    sns.barplot(data = df.T, palette=colors).set(ylabel='count');
    if rotate_xlabels != False:
        if rotate_xlabels == True:
            plt.xticks(rotation=20, ha='right', rotation_mode='anchor')
        else:
            plt.xticks(rotation=rotate_xlabels, ha='right', rotation_mode='anchor');
            
    if legend_outside == False:
        plt.legend(handles = [mpatches.Patch(color=i) for i in colors],
                   labels=[f"{i}: {df['count'][i]}" for i in df.T.columns],
                   facecolor="white", 
                   fontsize=15);
    else:
        plt.legend(handles = [mpatches.Patch(color=i) for i in colors],
               labels=[f"{i}: {df['count'][i]}" for i in df.T.columns],
               facecolor="white", 
               fontsize=15,
               bbox_to_anchor=(1.05, 1), 
               loc='upper left', 
               borderaxespad=0.);
            
    plt.savefig(filepath, bbox_inches='tight')

## DEMOGRAPHICS

In [None]:
lat_long_df = pd.read_pickle("../saved_data_frames/lat_long_df.pkl")
lat_long_df.head()

In [None]:
#Create a map of the area
base_map = folium.Map([lat_long_df['(lat,long)'][0][0], 
                       lat_long_df['(lat,long)'][0][1]], zoom_start=6)
base_map

In [None]:
for i in lat_long_df.index:
    lat = lat_long_df['(lat,long)'][i][0]
    long = lat_long_df['(lat,long)'][i][1]
    count = str(lat_long_df['count'][i])
    popup = folium.Popup(count, parse_html=True)
    marker = folium.Marker(location=[lat, long], popup=popup)
    marker.add_to(base_map)
base_map

In [None]:
base_map.save('../data/pre_intervention/Smiles_United.html')

In [None]:
demo_df = pd.read_pickle("../saved_data_frames/demographics_df.pkl")
demo_df.head()

In [None]:
distributions(demo_df['Primary Language'],
              "Primary Languages",
              figsize=(10, 5),
              rotate_xlabels=45,
              filepath="../images/pre_intervention/demographics/primary_language")

In [None]:
distributions(demo_df['Training Relevance'],
              "Training Relevance",
              rotate_xlabels=45,
              figsize=(10.1, 5),
              filepath="../images/pre_intervention/demographics/training_relevance")

In [None]:
distributions(demo_df['Community Type'],
              "Community Types",
              figsize=(6, 5),
              filepath="../images/pre_intervention/demographics/community_type")

In [None]:
distributions(demo_df['Gender'], 
              "Gender", 
              order='gender',
              rotate_xlabels=45,
              figsize=(8, 5),
              filepath="../images/pre_intervention/demographics/gender")

In [None]:
distributions(demo_df['Race/Ethnicity'], 
              "Distribution of Race/Ethnicity",
              figsize=(10, 5),
              rotate_xlabels=45,
              filepath="../images/pre_intervention/demographics/race_ethnicity")

## SELF-REPORTING

In [None]:
self_reporting = pd.read_pickle("../saved_data_frames/self_reporting_df.pkl")
self_reporting.head()

In [None]:
col = self_reporting.columns[2]

distributions(self_reporting[col], 
              "Interest in Additional Training",
              order='agree',
              figsize=(9, 5),
              rotate_xlabels=45,
              filepath="../images/pre_intervention/attitude/interest_in_additional_training")

In [None]:
# convertion dict to adjust to 'None'
none_dict = {"None of the residents under my care experience bleeding when brushing their teeth":"None"}


In [None]:
col = self_reporting.columns[3] # grab the column
self_reporting[col] = self_reporting[col].replace(none_dict)# convert with dict

# plot
distributions(self_reporting[col], 
              "Percentage of Residents Requiring Assistance with Brushing/Flossing",
              order="percent",
              title_fontsize=18,
              figsize=(9, 5),
              rotate_xlabels=45,
              filepath="../images/pre_intervention/demographics/assistance_brush_and_floss")

In [None]:
col = self_reporting.columns[4]

self_reporting[col] = self_reporting[col].replace(none_dict)

distributions(self_reporting[col], 
              "Percentage of Residents Who Experience BLEEDING when BRUSHING",
              order="percent",
              title_fontsize=18,
              figsize=(9, 5),
              rotate_xlabels=45,
              filepath="../images/pre_intervention/demographics/bleeding_brushing")

In [None]:
col = self_reporting.columns[5]

self_reporting[col] = self_reporting[col].replace(none_dict)

distributions(self_reporting[col], 
              "Percentage of Residents Who Experience BLEEDING when FLOSSING",
              order="percent",
              title_fontsize=18,
              figsize=(9, 5),
              rotate_xlabels=45,
              filepath="../images/pre_intervention/demographics/bleeding_flossing")

In [None]:
col = self_reporting.columns[6]
len(self_reporting[col].value_counts())

In [None]:
col = self_reporting.columns[6]

distributions(self_reporting[col], 
              "Frequency of Residents' BRUSHING",
              order='frequency_brush',
              figsize=(9, 5),
              rotate_xlabels=45,
              filepath="../images/pre_intervention/demographics/frequency_brushing")

In [None]:
col = self_reporting.columns[7]

distributions(self_reporting[col], 
              "Frequency of Residents' FLOSSING",
              order='frequency_floss',
              figsize=(9,5),
              rotate_xlabels=45,
              legend_outside = True,
              filepath="../images/pre_intervention/demographics/frequency_flossing")

In [None]:
col = self_reporting.columns[8]

distributions(self_reporting[col], 
              "Frequency of Residents' Snacking Between Brushing and Flossing",
              order='often',
              title_fontsize=18,
              figsize=(9, 5),
              rotate_xlabels=45,
              filepath="../images/pre_intervention/demographics/frequency_snacking")

In [None]:
col = self_reporting.columns[9]

distributions(self_reporting[col], 
              "Frequency of Residents' Visits to the Dentist",
              order='frequency_visit_dentist',
              figsize=(12, 6),
              rotate_xlabels=45,
              #legend_outside = True,
              filepath="../images/pre_intervention/demographics/frequency_dentist")

In [None]:
col = self_reporting.columns[10]

distributions(self_reporting[col], 
              "Obstacles to Providing Excellent Oral Care to Residents",
              figsize=(9, 5),
              rotate_xlabels=45,
              filepath="../images/pre_intervention/attitude/obstacles_providing_oral_care")

In [None]:
col = self_reporting.columns[11]

distributions(self_reporting[col], 
              "Sources of Dental-Related Information",
              figsize=(9, 5),
              rotate_xlabels=45,
              filepath="../images/pre_intervention/demographics/sources_of_information")

In [None]:
col = self_reporting.columns[12]

distributions(self_reporting[col], 
              "Oral Health Care Training Obstacles",
              figsize=(15, 7),
              rotate_xlabels=45,
              #legend_outside = True,
              filepath="../images/pre_intervention/demographics/training_obstacles")

In [None]:
col = self_reporting.columns[13]

distributions(self_reporting[col], 
              "Confidence Building Resources",
              figsize=(14, 7),
              rotate_xlabels=45,
              filepath="../images/pre_intervention/self_reporting/confidence_building_resources")

In [None]:
col = self_reporting.columns[14]

distributions(self_reporting[col], 
              "Previous Training on How to Provide Oral Health Care \nfor Individuals with Special Health Care Needs?",
              title_fontsize=18,
              figsize=(9, 5),
              rotate_xlabels=45,
              #legend_outside=True, 
              filepath="../images/pre_intervention/demographics/previous_training")



## HYPOTHESIS

In [None]:
eval_df = pd.read_pickle("../saved_data_frames/hypothesis_df.pkl")
eval_df.head()

In [None]:
col = eval_df.columns[2]

distributions(eval_df[col], 
              "Fluoridated Products Improve Oral Health",
              order='true_false',
              title_fontsize=18,
              figsize=(5,4),
              filepath="../images/pre_intervention/knowledge/floridated_products")



In [None]:
col = eval_df.columns[3]

distributions(eval_df[col], 
              "Healthy Gums Bleed When Brushing",
              figsize=(5,4),
              title_fontsize=18,
              order='true_false',
              filepath="../images/pre_intervention/knowledge/healthy_gums")

In [None]:
col = eval_df.columns[4]

distributions(eval_df[col], 
              "Dry Mouth Can Have a Negative Effect on Oral Health",
              figsize=(5,4),
              title_fontsize=18,
              order='true_false',
              filepath="../images/pre_intervention/knowledge/dry_mouth")

In [None]:
col = eval_df.columns[5]

distributions(eval_df[col], 
              "Snacking Can Have a Negative Effect on Oral Health",
              figsize=(5,4),
              title_fontsize=18,
              order='true_false', 
              filepath="../images/pre_intervention/knowledge/snacking_bad")

In [None]:
col = eval_df.columns[15]

distributions(eval_df[col], 
              "How often should residents brush their teeth?",
              order="k_brush", 
              figsize=(9,5),
              rotate_xlabels=45,
              filepath="../images/pre_intervention/knowledge/often_should_brush")

In [None]:
col = eval_df.columns[16]

distributions(eval_df[col], 
              "How often should residents floss their teeth ",
              order="frequency_floss",
              figsize=(9,5),
              rotate_xlabels=45,
              filepath="../images/pre_intervention/knowledge/often_should_floss")

In [None]:
col = eval_df.columns[12]
eval_df[col].value_counts()

In [None]:
col = eval_df.columns[12]

eval_df[col] = eval_df[col].replace(none_dict)

distributions(eval_df[col], 
              "Percentage of Residents Who Experience PAIN when BRUSHING",
              order="percent",
              title_fontsize=18,
              figsize=(9,5),
              rotate_xlabels=45,
              filepath="../images/pre_intervention/demographics/pain_when_brushing")

In [None]:
col = eval_df.columns[13]

eval_df[col] = eval_df[col].replace(none_dict)

distributions(eval_df[col], 
              "Percentage of Residents Who Experience PAIN when FLOSSING",
              order="percent",
              title_fontsize=18,
              figsize=(9,5),
              rotate_xlabels=45,
              filepath="../images/pre_intervention/demographics/pain_when_flossing")

In [None]:
col = eval_df.columns[14]

eval_df[col] = eval_df[col].replace(none_dict)

distributions(eval_df[col], 
              "Percentage of Residents who Experience Dental Pain Throughout the Day\n(when they are not brushing or flossing)",
              order="percent",
              title_fontsize=18,
              figsize=(9,5),
              rotate_xlabels=45,
              filepath="../images/pre_intervention/demographics/pain_throughout_day")

In [None]:
col = eval_df.columns[6]

distributions(eval_df[col], 
              "Respondents' Belief They Have Adequate Training",
               order="agree",
              title_fontsize=18,
              figsize=(9,5),
              rotate_xlabels=45,
              filepath="../images/pre_intervention/attitude/adequate_training")

In [None]:
col = eval_df.columns[7]

distributions(eval_df[col], 
              "Residents Have Oral Health Care Needs \nWhich Require Further Training",
              order="agree",
              title_fontsize=18,
              figsize=(9,5),
              rotate_xlabels=45,
              filepath="../images/pre_intervention/attitude/needed_further_training")

In [None]:
col = eval_df.columns[8]

distributions(eval_df[col], 
              "Respondents' Belief They Have Effective Teeth Brushing \nTechniques to Aid Residents",
               order="agree",
              title_fontsize=18,
              figsize=(9,5),
              rotate_xlabels=45,
              filepath="../images/pre_intervention/attitude/effective_teeth_brushing_techniques")

In [None]:
col = eval_df.columns[9]

distributions(eval_df[col], 
              "Respondents' Comfort Assisting Residents in the \nSafe Use of Fluoridated Dental Products", 
               order="agree",
              title_fontsize=18,
              figsize=(9,5),
              rotate_xlabels=45,
              filepath="../images/pre_intervention/attitude/comfort_assisting")

In [None]:
col = eval_df.columns[11]

distributions(eval_df[col], 
              "Respondents' Confidence They Have Adequate \nKnowledge to Identify Residents' Oral Pain", 
               order="agree",
              title_fontsize=18,
              figsize=(9,5),
              rotate_xlabels=45,
              filepath="../images/pre_intervention/attitude/confidence_identify_oral_pain")

In [None]:
col = eval_df.columns[10]

distributions(eval_df[col], 
              "Respondents' Confidence in Ability to Recognize \nNon-Verbal Signs of Pain in Residents", 
      
              order="agree",
              title_fontsize=18,
              figsize=(9,5),
              rotate_xlabels=45,
              filepath="../images/pre_intervention/attitude/confidence_identify_non_verbal_pain")

In [None]:
index = 10

print(eval_df.columns[index])
print()
eval_df[eval_df.columns[index]].value_counts()