# Plots for Smiles United Project - POST
This note book is all the plots created from the post-training data

In [None]:
import pandas as pd
import numpy as np
import scipy.stats as stats

import seaborn as sns
sns.set_style('darkgrid', {'axes.facecolor': '0.9', "grid.color": ".6", "grid.linestyle": ":"})
sns.set_context("talk")

import folium

import matplotlib.pyplot as plt
from matplotlib import patches as mpatches
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

## Minutes to complete survey

In [None]:
post_time = pd.read_pickle("../saved_data_frames/POST_time_df.pkl")
post_time.head()

In [None]:
fig, axes = plt.subplots(ncols=2, nrows=1, figsize=(10,5))
fig.set_tight_layout(True)
fig.suptitle("Minutes to complete survey", fontsize=20)

axes[0] = sns.histplot(data=post_time[['mins']], ax=axes[0])
axes[0].get_legend().remove()
axes[0].set(title='Histogram');

axes[1] = sns.boxplot(data=post_time[['mins']], saturation=0.9, color="tab:blue", ax=axes[1], orient="h")
axes[1].set(title='Bolxplot')
axes[1].tick_params(left=False)
axes[1].set(yticklabels=[]);

In [None]:
import math

meanpointprops = dict(marker='D', markeredgecolor='black',
                      markerfacecolor='firebrick')
trimmed = post_time[(np.abs(stats.zscore(post_time['mins'])) < 2)][['mins']]

fig, axes = plt.subplots(ncols=2, nrows=1, figsize=(15,5))
fig.set_tight_layout(True)
fig.suptitle("Minutes to complete POST survey  - with 2 extreme outliers removed", fontsize=25)


axes[0] = sns.histplot(data=trimmed,
                       color="tab:blue", 
                       ax=axes[0])
axes[0].axvline(trimmed.mean()[0], 
                color='firebrick', 
                lw=2, 
                ls='--', 
                label=f"Average time to complete survey: {math.floor(trimmed.mean()[0])}m:{round((trimmed.mean()[0]*60) % 60)}s")
axes[0].legend(facecolor="white")
axes[0].set(title='Histogram');


axes[1] = sns.boxplot(data=trimmed, 
                      #color="tab:blue", 
                      ax=axes[1], 
                      orient="h", 
                      meanprops=meanpointprops,
                      showmeans=True)
axes[1].set(title='Bolxplot')
axes[1].tick_params(left=False)
#axes[1].yaxis.grid(True)
axes[1].set(yticklabels=[]);
plt.savefig("../images/post_intervention/mins_to_complete",bbox_inches='tight')

In [None]:
# confirm # of surveys removed has been removed
print(f"original: {post_time.shape[0]}")
print(f"trimmed: {trimmed.shape[0]}")
print("-"*13)
difference = post_time.shape[0]-trimmed.shape[0]
print(f"# removed: {difference}")

## Total responses vs. Completed responses

In [None]:
post_totals = pd.read_pickle("../saved_data_frames/post_totals_df.pkl")
post_totals

In [None]:
colors = ['C10', 'C12']
plt.figure(figsize=(6,5))
plt.title("Total responses vs. Completed POST responses", fontsize=20)
sns.barplot(data = post_totals.T, palette=colors).set(ylabel='count')

plt.legend(handles = [mpatches.Patch(color=i) for i in colors],
           labels=[f"{i}: {post_totals['count'][i]}" for i in post_totals.T.columns],
           facecolor="white", 
           fontsize=13);
plt.savefig("../images/post_intervention/total_v_complete", bbox_inches='tight')

In [None]:
# summary of pre and post tarining survey completion

In [None]:
post_totals

In [None]:
totals = pd.read_pickle("../saved_data_frames/totals_df.pkl")
totals

In [None]:
fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=(15,6), sharey=True)
fig.set_tight_layout(True)
fig.suptitle("Total responses vs. Completed responses", fontsize=25);

# pre training
sns.barplot(data = totals.T, palette=colors, ax=ax1).set(ylabel='count');

ax1.legend(handles = [mpatches.Patch(color=i) for i in colors],
           labels=[f"{i}: {totals['count'][i]}" for i in totals.T.columns],
           facecolor="white", 
           fontsize=13);
ax1.set_title("Pre-training Surveys", fontsize=20);

# post training
sns.barplot(data = post_totals.T, palette=colors, ax=ax2).set(ylabel='count')

ax2.legend(handles = [mpatches.Patch(color=i) for i in colors],
           labels=[f"{i}: {post_totals['count'][i]}" for i in post_totals.T.columns],
           facecolor="white", 
           fontsize=13);
ax2.set_title("Post-training Surveys", fontsize=20);

plt.savefig("../images/pre_post_survey_completion", bbox_inches='tight')

## Number of respondents by latitude & longitude

In [None]:
POST_lat_long_df = pd.read_pickle("../saved_data_frames/lat_long_POST_df.pkl")
POST_lat_long_df.head()

In [None]:
#Create a map of the area
POST_base_map = folium.Map([POST_lat_long_df['(lat,long)'][0][0], 
                            POST_lat_long_df['(lat,long)'][0][1]], zoom_start=6)
POST_base_map

In [None]:
for i in POST_lat_long_df.index:
    lat = POST_lat_long_df['(lat,long)'][i][0]
    long = POST_lat_long_df['(lat,long)'][i][1]
    count = str(POST_lat_long_df['count'][i])
    popup = folium.Popup(count, parse_html=True)
    marker = folium.Marker(location=[lat, long], popup=popup)
    marker.add_to(POST_base_map)
POST_base_map

In [None]:
POST_base_map.save('../data/post/Smiles_United.html')

## Create a Plotting Function


In [None]:
# make a function for plots
def distributions(data, 
                  title, 
                  rotate_xlabels=False, 
                  figsize=(15,8), 
                  order=None, 
                  title_fontsize=20,
                  legend_outside=False,
                  filepath=None):
    
    df = pd.DataFrame(data.value_counts())
    colors = ['C0','C1','C2','C3','C4','C5','C6','C7','C8','C9','C10']
    if order != None:
        if order == 'agree':
            colors = ['#2bdb1f','#c4fa2f','#fae22f','#fa942f','#fa2f2f']
            df = df.T[['Strongly Agree',
                       'Agree',
                       'Neutral',
                       'Disagree',
                       'Strongly Disagree']].T
            
        elif order == 'agree_lowercase':
            colors = ['#2bdb1f','#c4fa2f','#fae22f','#fa942f','#fa2f2f']
            df = df.T[['Strongly agree',
                       'Agree',
                       'Neutral',
                       'Disagree',
                       'Strongly disagree']].T
        
        elif order == 'percent':
            colors = ['#C97EFE','#AD70D8','#9963BF','#8053A0','#684382']
            df = df.T[['None',
                       'Less than 25%',
                       '25% to 50%',
                       '50% to 75%',
                       'Greater than 75%']].T
        
        elif order == 'frequency_brush':
            colors = ['#82C5FE','#6DA5D5','#5782A8','#456886']
            df = df.T[['Less than 1 time each month',
                       '2 to 3 times each week',
                       '1 time each day',
                       '2 to 3 times each day']].T
        
        elif order == 'frequency_floss':
            colors = ['#8AC386','#73B06F','#5E9C5A','#498745','#3A7536', "#2C6329"]
            df = df.T[['Less than 1 time each month',
                       '1 time each month',
                       '1 time each week',
                       '2 to 3 times each week',
                       '1 time each day',
                       '2 to 3 times each day']].T
        
        elif order == 'often':
            colors = ['#FDC384','#FEAC54','#FD982B','#FD8300']
            df = df.T[['Never','Not often','Somewhat often','Very often']].T
        
        elif order == 'true_false':
            colors = ['C24', 'C19']
            df = df.T[['True','False']].T
        
        elif order == 'k_brush':
            colors = ['#82C5FE','#70AADB','#6193BD','#527CA0','#456886']
            df = df.T[['Less than 1 time each month',
                       '1 time each week',
                       '2 to 3 times each week',
                       '1 time each day',
                       '2 to 3 times each day']].T
            
        elif order == 'frequency_visit_dentist':
            colors = ['#83FED7','#72DCBA','#61BDA0','#519F86','#44846F', # gradiant
                      'C0','C1','C3'] # random 

            df = df.T[['Only when they have pain',
                       'Less than once every 12 months',
                       'Once every 12 months',
                       'Once every 6 months',
                       'Once every 3 months',
                       'Unknown',
                       'Varies',
                       'Never']].T

    df.columns = ['count']
    
    plt.figure(figsize=figsize)
    plt.title(title, fontsize=title_fontsize)
    sns.barplot(data = df.T, palette=colors).set(ylabel='count');
    if rotate_xlabels != False:
        if rotate_xlabels == True:
            plt.xticks(rotation=20, ha='right', rotation_mode='anchor')
        else:
            plt.xticks(rotation=rotate_xlabels, ha='right', rotation_mode='anchor');
            
    if legend_outside == False:
        plt.legend(handles = [mpatches.Patch(color=i) for i in colors],
                   labels=[f"{i}: {df['count'][i]}" for i in df.T.columns],
                   facecolor="white", 
                   fontsize=15);
    else:
        plt.legend(handles = [mpatches.Patch(color=i) for i in colors],
               labels=[f"{i}: {df['count'][i]}" for i in df.T.columns],
               facecolor="white", 
               fontsize=15,
               bbox_to_anchor=(1.05, 1), 
               loc='upper left', 
               borderaxespad=0.);
            
    plt.savefig(filepath, bbox_inches='tight')

## Load post training data

In [None]:
post = pd.read_pickle("../saved_data_frames/cleaned_post.pkl")

In [None]:
post.head()

## Plot location sites

In [None]:
site = pd.DataFrame(post[['Your program site']].value_counts())
site.columns = ['count']
site.index.names = ['']
site.index = ['Anderson Center','Not Specified', 'Mamaroneck (YAI)',]
site

In [None]:
colors = ['C2', 'C4', 'C5']
plt.figure(figsize=(10,5))
plt.title("Post Training Location Sites", fontsize=20)
sns.barplot(data = site.T, palette=colors).set(ylabel='count')

plt.legend(handles = [mpatches.Patch(color=i) for i in colors],
           labels=[f"{i}: {site['count'][i]}" for i in site.index],
           facecolor="white", 
           fontsize=13);

plt.savefig("../images/post_intervention/location_sites", bbox_inches='tight')

# Plots of PAIRED surveys

## Load PRE training with the 50 reliability testing surveys removed

In [None]:
sig_testing = pd.read_pickle("../saved_data_frames/sig_testing_PRE.pkl")
#sig_testing

In [None]:
# check there are no duplicates in either the random or chosen id's
print(sig_testing['ChosenID'].duplicated().sum())
print(sig_testing['RandomID'].duplicated().sum())

In [None]:
# all values in ChosenID and RandomID are unique

In [None]:
all_pre = pd.read_pickle("../saved_data_frames/cleaned_pre.pkl")
all_pre = all_pre.replace(
    {'None of the residents under my care experience bleeding when brushing their teeth':
     'None'}
)
#all_pre

In [None]:
# join
pre = all_pre.join(
    sig_testing[['ChosenID','RandomID']],
    lsuffix='_drop', 
    rsuffix='',
    how='inner'
)#.drop(['ChosenID_drop','RandomID_drop'], axis=1)

#pre

In [None]:
(pre['ChosenID'] != pre['ChosenID_drop']).sum()

In [None]:
(pre['RandomID'] != pre['RandomID_drop']).sum()

In [None]:
pre = pre.drop(['ChosenID_drop','RandomID_drop'], axis=1)

## Extract out additional questions added to the post training survey

In [None]:
post_DDPC_required = post[[
    'I am satisfied with the Smiles United training videos.',
    'I have increased my advocacy following the Smiles United training.',
    'I am better able to say what I want.',
    'I am serving in leadership or advocacy positions (ex. Coalitions, policy boards, advisory boards)'
]]

## re arrange columns to match pre training data

In [None]:
post = post[
    [
        'I would be interested in receiving additional training to help maintain the oral health of residents under my care.',
        'Approximately, what percentage of residents under your care require assistance brushing or flossing their teeth?',
        'Approximately, what percentage of residents under your care experience bleeding when brushing their teeth?',
        'Approximately, what percentage of residents under your care experience bleeding when flossing their teeth?',
        'On average, how often do most residents under your care brush their teeth?',
        'On average, how often do most residents under your care floss their teeth?',
        'Approximately, how often do residents under your care have snacks throughout the day between brushing and flossing their teeth?',
        'Approximately, how often do residents under your care go to the dentist?_consolidated',
        'What is the biggest obstacle to providing excellent oral care to residents?_consolidated',
        'What is your primary source of dental-related information?_consolidated',
        'What is the biggest obstacle to receiving proper oral health care training in your facility?_consolidated',
        'Which resource would be most useful to help improve your confidence in delivering excellent oral homecare to residents under your care?_consolidated',
        'Before today, I have received training on how to provide direct oral health care for individuals with special health care needs_consolidated',
        'Which of the following best describes your Race/Ethnicity?',
        'Do you identify as:',
        'Which of the following best describes the area you live in?',
        'What is your primary language?',
        'Are you a:',
        'Fluoridated products, such as fluoridated toothpaste and fluoridated water, can help improve the oral health of residents.',
        'It is normal for healthy gums to bleed when brushing teeth.',
        'Dry mouth can have a negative effect on overall oral health.',
        'Snacking throughout the day can have a negative impact on oral health.',
        'I believe I have effective techniques which I use to brush the teeth of residents under my care.',
        'I believe I have previously received adequate training to help provide the best oral care possible to residents under my care.',
        'I believe residents under my care have oral health care needs which require further training to adequately understand and help manage.',
        'I feel comfortable assisting residents in the safe use of fluoridated dental products (such as fluoridated toothpaste).',
        'I am able to confidently recognize non-verbal signs of pain in residents under my care.',
        'I feel confident that I have the knowledge to identify when residents under my care experience oral pain.',
        'Approximately, what percentage of residents under your care express that they experience pain when brushing their teeth?',
        'Approximately, what percentage of residents under your care experience pain when flossing their teeth?',
        'Approximately, what percentage of residents under your care express that they experience dental pain throughout the day when they are not brushing or flossing their teeth?',            
        'How often should residents brush their teeth each day?',
        'How often should residents floss their teeth each day?',     
        'EnterChosenID',
        'EnterRandomID'
    ]
]


In [None]:
post.columns = pre.columns

In [None]:
post[["I believe I have previously received adequate training to help provide the best oral care possible to residents under my care."]].value_counts()


## Check duplicated ID numbers from `post`

In [None]:
dup_chosen = dict(post["ChosenID"].value_counts()[:6])

In [None]:
dup_random = dict(post["RandomID"].value_counts()[:6])

In [None]:
[k for k,v in dup_chosen.items()]

In [None]:
post['ChosenID'] = post['ChosenID'].replace(to_replace='77291959).',
                                            value='77291959')

## remove duplicated ID number from `post`

In [None]:
post['DuplicatedID'] = False

for i in post.index:
    if post['ChosenID'][i] in [k for k,v in dup_chosen.items()]:
        post['DuplicatedID'][i] = True
    
    if post['RandomID'][i] in [k for k,v in dup_random.items()]:
        post['DuplicatedID'][i] = True

post_trimmed = post[post['DuplicatedID'] == False]   
post_trimmed = post_trimmed.drop(['DuplicatedID'], axis=1).reset_index(drop=True)

In [None]:
post_trimmed['ChosenID'] = post_trimmed['ChosenID'].replace(to_replace="5258 1950",value="52581950")

## join 

In [None]:
post_trimmed.shape

In [None]:
pre.shape

In [None]:

# set both indices to "ChosenID"
pre.index = pre['ChosenID']
post_trimmed.index = post_trimmed['ChosenID']

joined_chosen = pre.join(
    post_trimmed,
    lsuffix='_PRE', 
    rsuffix='',
    how='inner'
)

print((joined_chosen['ChosenID'] != joined_chosen['ChosenID_PRE']).sum())
print((joined_chosen['RandomID'] != joined_chosen['RandomID_PRE']).sum())

In [None]:
joined_chosen

In [None]:
# set both indices to "RandomID"
pre.index = pre['RandomID']
post_trimmed.index = post_trimmed['RandomID']

joined_random = pre.join(
    post_trimmed,
    lsuffix='_PRE', 
    rsuffix='',
    how='inner'
)

print((joined_random['ChosenID'] != joined_random['ChosenID_PRE']).sum())
print((joined_random['RandomID'] != joined_random['RandomID_PRE']).sum())

In [None]:
joined_random

In [None]:
c = [n for n in post_trimmed['ChosenID']]
r = [n for n in post_trimmed['RandomID']]

In [None]:
grab = []

for n in pre['ChosenID']:
    if (n in c) or (n in r):
        grab.append(n)
        
for n in pre['RandomID']:
    if (n in c) or (n in r):
        grab.append(n)
        
grab

## Differences in instructions 

### Pre-training instructions

In [None]:
print("""
Please make a note of this random ID number assigned to you:\n\n\n\nYour ID number is\n\n\n\n[Field-RandomID]\n\n\n\nAt the end of the survey, you will have the opportunity to save your response as a pdf. The pdf will include your 5-digit ID number. Please save the random ID number as it will be required for the follow-up survey.\n\n\n\nIn the box below, please enter the last 4 digits of your phone number along with the 4 digits of your birth year (ie. the Last four digits of your ph.no. is 1234 and your birth year is 1994, then the response would look like this: 12341994).  All answers will be kept confidential and will NOT be shared with your employer. Once you finished entering the 4 digits of your phone number along with the 4 digits of your birth year, click the right arrow at the bottom of the screen to continue with the survey.'

""")

### Post-training instructions

In [None]:
print("""
In the box below, please enter the last 4 digits of your phone number along with the 4 digits of your birth year (ie. the Last four digits of your ph.no. is 1234 and your birth year is 1994, then the response will look like this: 12341994). All answers will be kept confidential and will NOT be shared with your employer. Once you finished entering the 4 digits of your phone number along with the 4 digits of your birth year, click the right arrow at the bottom of the screen to continue with the survey.
""")

# Frequency Distribution Plots of all Post Training Data

In [None]:
plots = post.drop(["ChosenID","RandomID","DuplicatedID"], axis=1)

In [None]:
col = plots.columns[0]
distributions(plots[col],
              "Interest in Additional Training (post-training)",
              order="agree",
              title_fontsize=18,
              figsize=(9,5),
              rotate_xlabels=45,
              filepath="../images/post_intervention/attitude/interest_in_additional_training_POST")

In [None]:
col = plots.columns[1]
distributions(plots[col], 
              "Percentage of Residents Requiring Assistance with Brushing/Flossing (post-training)",
              order="percent",
              title_fontsize=18,
              figsize=(9, 5),
              rotate_xlabels=45,
              filepath="../images/post_intervention/demographics/assistance_brush_and_floss_POST")

In [None]:
col = plots.columns[2]
distributions(plots[col], 
              "Percentage of Residents Who Experience BLEEDING when BRUSHING (post-training)",
              order="percent",
              title_fontsize=18,
              figsize=(9, 5),
              rotate_xlabels=45,
              filepath="../images/post_intervention/demographics/bleeding_brushing_POST")

In [None]:
col = plots.columns[3]
distributions(plots[col], 
              "Percentage of Residents Who Experience BLEEDING when FLOSSING (post-training)",
              order="percent",
              title_fontsize=18,
              figsize=(9, 5),
              rotate_xlabels=45,
              filepath="../images/post_intervention/demographics/bleeding_flossing_POST")

In [None]:
#

In [None]:
temp_df = pd.DataFrame(plots[plots.columns[4]].value_counts()).T
temp_df["Less than 1 time each month"] = 0
temp_df = temp_df[
    [
        'Less than 1 time each month',
        '2 to 3 times each week',
        '1 time each day',
        '2 to 3 times each day'
    ]
].T
temp_df.columns= ["count"]

In [None]:
plt.figure(figsize=(9, 5))
plt.title("Frequency of Residents' BRUSHING (post-training)", fontsize=20)
sns.barplot(data = temp_df.T, 
            palette=['#82C5FE','#6DA5D5','#5782A8','#456886']).set(ylabel='count');

plt.xticks(rotation=45, ha='right', rotation_mode='anchor');


plt.legend(handles = [mpatches.Patch(color=i) for i in ['#82C5FE','#6DA5D5','#5782A8','#456886']],
           labels=[f"{i}: {temp_df['count'][i]}" for i in temp_df.T.columns],
           facecolor="white", 
           fontsize=15);

plt.savefig("../images/post_intervention/demographics/frequency_brushing_POST", bbox_inches='tight')

In [None]:
col = plots.columns[5]
distributions(plots[col], 
              "Frequency of Residents' FLOSSING (post-training)",
              order='frequency_floss',
              figsize=(9,5),
              rotate_xlabels=45,
              legend_outside = True,
              filepath="../images/post_intervention/demographics/frequency_flossing_POST")

In [None]:
col = plots.columns[6]
distributions(plots[col], 
              "Frequency of Residents' Snacking Between Brushing and Flossing (post-training)",
              order='often',
              title_fontsize=18,
              figsize=(9, 5),
              rotate_xlabels=45,
              filepath="../images/post_intervention/demographics/frequency_snacking_POST")

In [None]:
col = plots.columns[7]
distributions(plots[col], 
              "Frequency of Residents' Visits to the Dentist (post-training)",
              order='frequency_visit_dentist',
              figsize=(14, 6),
              title_fontsize=21,
              rotate_xlabels=45,
              #legend_outside = True,
              filepath="../images/post_intervention/demographics/frequency_dentist_POST")

In [None]:
col = plots.columns[8]
distributions(plots[col], 
              "Obstacles to Providing Excellent Oral Care to Residents (post-training)",
              figsize=(9, 5),
              rotate_xlabels=45,
              filepath="../images/post_intervention/attitude/obstacles_providing_oral_care_POST")

In [None]:
dental_info_dict = {
    "Dental professionals such as dentist, dental hygienist, dental assistants":"Dental professionals",
    "Internet and social media sites such as Google, YouTube, Twitter, Facebook, etc.":"Internet and social media",
    "Academic sources such as research papers and research journal articles":"Academic sources"
}

In [None]:
col = plots.columns[9]

plots[col] = plots[col].replace(dental_info_dict)

distributions(plots[col], 
              "Sources of Dental-Related Information (post-training)",
              figsize=(9, 5),
              rotate_xlabels=45,
              filepath="../images/post_intervention/demographics/sources_of_information_POST")

In [None]:
col = plots.columns[10]
distributions(plots[col], 
              "Oral Health Care Training Obstacles (post-training)",
              figsize=(15, 7),
              rotate_xlabels=45,
              #legend_outside = True,
              filepath="../images/post_intervention/demographics/training_obstacles_POST")

In [None]:
col = plots.columns[11]
distributions(plots[col], 
              "Confidence Building Resources (post-training)",
              figsize=(13, 7),
              rotate_xlabels=45,
              filepath="../images/post_intervention/self_reporting/confidence_building_resources_POST")

In [None]:
col = plots.columns[12]
distributions(plots[col], 
              "Previous Training on How to Provide Oral Health Care \nfor Individuals with Special Health Care Needs? (post-training)",
              title_fontsize=18,
              figsize=(9, 5),
              rotate_xlabels=45,
              #legend_outside=True, 
              filepath="../images/post_intervention/demographics/previous_training_POST")



In [None]:
col = plots.columns[13]
distributions(plots[col], 
              "Distribution of Race/Ethnicity (post-training)",
              figsize=(9, 5),
              rotate_xlabels=45,
              filepath="../images/post_intervention/demographics/race_ethnicity_POST")

In [None]:
col = plots.columns[14]
distributions(plots[col], 
              "Gender (post-training)", 
              order='gender',
              rotate_xlabels=45,
              figsize=(6, 5),
              filepath="../images/post_intervention/demographics/gender_POST")

In [None]:
col = plots.columns[15]
distributions(plots[col],
              "Community Types (post-training)",
              figsize=(6, 5),
              filepath="../images/post_intervention/demographics/community_type_POST")

In [None]:
col = plots.columns[16]
distributions(plots[col],
              "Primary Languages (post-training)",
              figsize=(6, 5),
              rotate_xlabels=45,
              filepath="../images/post_intervention/demographics/primary_language_POST")

In [None]:
col = plots.columns[17]
distributions(plots[col],
              "Training Relevance (post-training)",
              rotate_xlabels=45,
              figsize=(6, 5),
              legend_outside=True, 
              filepath="../images/post_intervention/demographics/training_relevance_POST")

In [None]:
col = plots.columns[18]
distributions(plots[col], 
              "Fluoridated Products Improve Oral Health (post-training)",
              order='true_false',
              title_fontsize=18,
              figsize=(5,4),
              filepath="../images/post_intervention/knowledge/floridated_products_POST")


In [None]:
col = plots.columns[19]
distributions(plots[col], 
              "Healthy Gums Bleed When Brushing (post-training)",
              figsize=(5,4),
              title_fontsize=18,
              order='true_false',
              filepath="../images/post_intervention/knowledge/healthy_gums_POST")

In [None]:
col = plots.columns[20]
distributions(plots[col], 
              "Dry Mouth Can Have a Negative Effect on Oral Health (post-training)",
              figsize=(5,4),
              title_fontsize=18,
              order='true_false',
              filepath="../images/post_intervention/knowledge/dry_mouth_POST")

In [None]:
col = plots.columns[21]
distributions(plots[col], 
              "Snacking Can Have a Negative Effect on Oral Health (post-training)",
              figsize=(5,4),
              title_fontsize=18,
              order='true_false', 
              filepath="../images/post_intervention/knowledge/snacking_bad_POST")

In [None]:
temp_df2 = pd.DataFrame(plots[plots.columns[22]].value_counts()).T
temp_df2['Disagree'] = 0
temp_df2 = temp_df2[[
    'Strongly Agree',
    'Agree',
    'Neutral',
    'Disagree',
    'Strongly Disagree'
]].T
temp_df2.columns= ["count"]

In [None]:
plt.figure(figsize=(9, 5))
plt.title("Respondents' Belief They Have Effective Teeth Brushing \nTechniques to Aid Residents (post-training)", fontsize=20)
sns.barplot(data = temp_df2.T, 
            palette=['#2bdb1f','#c4fa2f','#fae22f','#fa942f','#fa2f2f']).set(ylabel='count');

plt.xticks(rotation=45, ha='right', rotation_mode='anchor');


plt.legend(handles = [mpatches.Patch(color=i) for i in ['#2bdb1f','#c4fa2f','#fae22f','#fa942f','#fa2f2f']],
           labels=[f"{i}: {temp_df2['count'][i]}" for i in temp_df2.T.columns],
           facecolor="white", 
           fontsize=15);

plt.savefig("../images/post_intervention/attitude/effective_teeth_brushing_techniques_POST", bbox_inches='tight')

In [None]:
col = plots.columns[23]
distributions(plots[col], 
              "Respondents' Belief They Have Adequate Training (post-training)",
              order="agree",
              title_fontsize=18,
              figsize=(9,5),
              rotate_xlabels=45,
              filepath="../images/post_intervention/attitude/adequate_training_POST")

In [None]:
col = plots.columns[24]
distributions(plots[col], 
              "Residents Have Oral Health Care Needs \nWhich Require Further Training (post-training)",
              order="agree",
              title_fontsize=18,
              figsize=(9,5),
              rotate_xlabels=45,
              filepath="../images/post_intervention/attitude/needed_further_training_POST")

In [None]:
col = plots.columns[25]
distributions(plots[col], 
              "Respondents' Comfort Assisting Residents in the \nSafe Use of Fluoridated Dental Products (post-training)", 
               order="agree",
               title_fontsize=18,
               figsize=(9,5),
               rotate_xlabels=45,
               filepath="../images/post_intervention/attitude/comfort_assisting_POST")

In [None]:
col = plots.columns[26]
distributions(plots[col], 
              "Respondents' Confidence in Ability to Recognize \nNon-Verbal Signs of Pain in Residents (post-training)", 
              order="agree",
              title_fontsize=18,
              figsize=(9,5),
              rotate_xlabels=45,
              filepath="../images/post_intervention/attitude/confidence_identify_non_verbal_pain_POST")

In [None]:
col = plots.columns[27]
distributions(plots[col], 
              "Respondents' Confidence They Have Adequate \nKnowledge to Identify Residents' Oral Pain (post-training)", 
              order="agree",
              title_fontsize=18,
              figsize=(9,5),
              rotate_xlabels=45,
              filepath="../images/post_intervention/attitude/confidence_identify_oral_pain_POST")

In [None]:
temp_df3 = pd.DataFrame(plots[plots.columns[28]].value_counts()).T
temp_df3['Greater than 75%'] = 0
temp_df3 = temp_df3[[
    'None',
    'Less than 25%',
    '25% to 50%',
    '50% to 75%',
    'Greater than 75%'
]].T
temp_df3.columns= ["count"]

In [None]:
colors = ['#C97EFE','#AD70D8','#9963BF','#8053A0','#684382']
plt.figure(figsize=(9, 5))
plt.title("Percentage of Residents Who Experience PAIN when BRUSHING (post-training)", fontsize=20)
sns.barplot(data = temp_df3.T, 
            palette=colors).set(ylabel='count');

plt.xticks(rotation=45, ha='right', rotation_mode='anchor');


plt.legend(handles = [mpatches.Patch(color=i) for i in colors],
           labels=[f"{i}: {temp_df3['count'][i]}" for i in temp_df3.T.columns],
           facecolor="white", 
           fontsize=15);

plt.savefig("../images/post_intervention/demographics/pain_when_brushing_POST", bbox_inches='tight')

In [None]:
col = plots.columns[29]
distributions(plots[col], 
              "Percentage of Residents Who Experience PAIN when FLOSSING (post-training)",
              order="percent",
              title_fontsize=18,
              figsize=(9,5),
              rotate_xlabels=45,
              filepath="../images/post_intervention/demographics/pain_when_flossing_POST")

In [None]:
col = plots.columns[30]
distributions(plots[col], 
              "Percentage of Residents who Experience Dental Pain Throughout the Day\n(when they are not brushing or flossing) (post-training)",
              order="percent",
              title_fontsize=18,
              figsize=(9,5),
              rotate_xlabels=45,
              filepath="../images/post_intervention/demographics/pain_throughout_day_POST")

In [None]:
temp_df4 = pd.DataFrame(plots[plots.columns[31]].value_counts()).T
temp_df4['Less than 1 time each month'] = 0
temp_df4['1 time each week'] = 0
temp_df4['2 to 3 times each week'] = 0
temp_df4['1 time each day'] = 0

temp_df4 = temp_df4[[
    'Less than 1 time each month',
    '1 time each week',
    '2 to 3 times each week',
    '1 time each day',
    '2 to 3 times each day'
]].T
temp_df4.columns= ["count"]

In [None]:
colors = ['#82C5FE','#70AADB','#6193BD','#527CA0','#456886']
title = "How often should residents brush their teeth? (post-training)"

plt.figure(figsize=(9, 5))
plt.title(title, fontsize=20)
sns.barplot(data = temp_df4.T, palette=colors).set(ylabel='count');

plt.xticks(rotation=45, ha='right', rotation_mode='anchor');


plt.legend(handles = [mpatches.Patch(color=i) for i in colors],
           labels=[f"{i}: {temp_df4['count'][i]}" for i in temp_df4.T.columns],
           facecolor="white", 
           fontsize=15);

plt.savefig("../images/post_intervention/knowledge/often_should_brush_POST", bbox_inches='tight')

In [None]:
temp_df5 = pd.DataFrame(plots[plots.columns[32]].value_counts()).T
temp_df5['1 time each week'] = 0

temp_df5 = temp_df5[[
    'Less than 1 time each month',
    '1 time each month',
    '1 time each week',
    '2 to 3 times each week',
    '1 time each day',
    '2 to 3 times each day'
]].T
temp_df5.columns= ["count"]

In [None]:
colors = ['#8AC386','#73B06F','#5E9C5A','#498745','#3A7536', "#2C6329"]
title = "How often should residents floss their teeth (post-training)"

plt.figure(figsize=(9, 5))
plt.title(title, fontsize=20)
sns.barplot(data = temp_df5.T, palette=colors).set(ylabel='count');

plt.xticks(rotation=45, ha='right', rotation_mode='anchor');


plt.legend(handles = [mpatches.Patch(color=i) for i in colors],
           labels=[f"{i}: {temp_df5['count'][i]}" for i in temp_df5.T.columns],
           facecolor="white", 
           fontsize=15);

plt.savefig("../images/post_intervention/knowledge/often_should_floss_POST", bbox_inches='tight')

In [None]:
post_DDPC_required[post_DDPC_required.columns[0]]


In [None]:
col = post_DDPC_required.columns[0]
distributions(post_DDPC_required[col], 
              "Respondents' Satisfaction with Smiles United Training Videos", 
              order="agree_lowercase",
              title_fontsize=18,
              figsize=(9,5),
              rotate_xlabels=45,
              filepath="../images/post_intervention/added_post_only/Satisfaction_with_Smiles_United_Training_POST")

In [None]:
temp_df6 = pd.DataFrame(post_DDPC_required[post_DDPC_required.columns[1]].value_counts()).T
temp_df6['Disagree'] = 0
temp_df6 = temp_df6[[
    'Strongly agree',
    'Agree',
    'Neutral',
    'Disagree',
    'Strongly disagree'
]].T
temp_df6.columns= ["count"]

plt.figure(figsize=(9, 5))
plt.title("Increase in Advocacy Following the Smiles United Training", fontsize=20)
sns.barplot(data = temp_df6.T, 
            palette=['#2bdb1f','#c4fa2f','#fae22f','#fa942f','#fa2f2f']).set(ylabel='count');

plt.xticks(rotation=45, ha='right', rotation_mode='anchor');


plt.legend(handles = [mpatches.Patch(color=i) for i in ['#2bdb1f','#c4fa2f','#fae22f','#fa942f','#fa2f2f']],
           labels=[f"{i}: {temp_df6['count'][i]}" for i in temp_df6.T.columns],
           facecolor="white", 
           fontsize=15);

plt.savefig("../images/post_intervention/added_post_only/advocacy_following_Smiles_United_Training_POST", bbox_inches='tight')

In [None]:
temp_df7 = pd.DataFrame(post_DDPC_required[post_DDPC_required.columns[2]].value_counts()).T
temp_df7['Disagree'] = 0
temp_df7 = temp_df7[[
    'Strongly agree',
    'Agree',
    'Neutral',
    'Disagree',
    'Strongly disagree',
    'Not Specified'
]].T
temp_df7.columns= ["count"]

plt.figure(figsize=(9, 5))
plt.title("Respondents Feel They Are Better Able to Say What They Want", fontsize=20)
sns.barplot(data = temp_df7.T, 
            palette=['#2bdb1f','#c4fa2f','#fae22f','#fa942f','#fa2f2f', 'C0']).set(ylabel='count');

plt.xticks(rotation=45, ha='right', rotation_mode='anchor');


plt.legend(handles = [mpatches.Patch(color=i) for i in ['#2bdb1f','#c4fa2f','#fae22f','#fa942f','#fa2f2f', 'C0']],
           labels=[f"{i}: {temp_df7['count'][i]}" for i in temp_df7.T.columns],
           facecolor="white", 
           fontsize=15);

plt.savefig("../images/post_intervention/added_post_only/better_ability_to_say_want_POST", bbox_inches='tight')

In [None]:
col = post_DDPC_required.columns[3]
distributions(post_DDPC_required[col], 
              "Serving in Leadership or Advocacy Positions", 
              order="agree_lowercase",
              title_fontsize=18,
              figsize=(9,5),
              rotate_xlabels=45,
              filepath="../images/post_intervention/added_post_only/Serving_in_Leadership_POST")

In [None]:
col = post_DDPC_required.columns[3]
post_DDPC_required[col].value_counts()

# Compare pre- and post- surveys
As a reminder: 
- cleaned post training data df: `plots`
- cleaned pre training data df, with 50 reliability surveys removed: `sig_testing`

In [None]:
plots.head()

In [None]:
sig_testing.head()

In [None]:
# checking if there are any spaces that need to be removed to pair more than the 4 that paired
#for ID in sig_testing['ChosenID']:
#    print(f"'{ID}'")

# Unfortunatly, there are none

In [None]:
print(post.shape)
print(plots.shape)
print(sig_testing.shape)

## Extract just the knowledge questions

In [None]:
post_k = post[
    ['Fluoridated products, such as fluoridated toothpaste and fluoridated water, can help improve the oral health of residents.',
     'It is normal for healthy gums to bleed when brushing teeth.',
     'Dry mouth can have a negative effect on overall oral health.',
     'Snacking throughout the day can have a negative impact on oral health.',
     'How often should residents brush their teeth each day?',
     'How often should residents floss their teeth each day?']
]

In [None]:
pre_k = sig_testing[
    ['Fluoridated products, such as fluoridated toothpaste and fluoridated water, can help improve the oral health of residents.',
     'It is normal for healthy gums to bleed when brushing teeth.',
     'Dry mouth can have a negative effect on overall oral health.',
     'Snacking throughout the day can have a negative impact on oral health.',
     'How often should residents brush their teeth each day?',
     'How often should residents floss their teeth each day?']
]

In [None]:
post_k.columns == pre_k.columns

In [None]:
post.shape

## change string values to numbers

In [None]:
convert_dict = {
    
    # true/false (knowledge)
    'True':1,
    'False':2,
    
    # frequency (knowledge)
    'Less than 1 time each month':1,
    '1 time each month': 2,
    '1 time each week':3,
    '2 to 3 times each week':4,
    '1 time each day':5,
    '2 to 3 times each day':6,
    
}

## Function to create compare df for plotting

In [None]:
def scores_df(pre_knowledge_df, post_knowledge_df):
    
    # replace string values with numbers
    pre_as_nums = pre_knowledge_df.replace(convert_dict)
    post_as_nums = post_knowledge_df.replace(convert_dict)
    
    # convert pre training responses to binary
    convert_col0 = [1 if answer == 1 else 0 for answer in pre_as_nums[pre_as_nums.columns[0]]] 
    convert_col1 = [1 if answer == 2 else 0 for answer in pre_as_nums[pre_as_nums.columns[1]]] 
    convert_col2 = [1 if answer == 1 else 0 for answer in pre_as_nums[pre_as_nums.columns[2]]] 
    convert_col3 = [1 if answer == 1 else 0 for answer in pre_as_nums[pre_as_nums.columns[3]]] 
    convert_col4 = [1 if answer == 6 \
                   else 0 for answer in pre_as_nums[pre_as_nums.columns[4]]] 
    convert_col5 = [1 if answer == 6 \
                   else 0 for answer in pre_as_nums[pre_as_nums.columns[5]]] 

    # convert post training responses to binary    
    convert_c0 = [1 if answer == 1 else 0 for answer in post_as_nums[post_as_nums.columns[0]]] 
    convert_c1 = [1 if answer == 2 else 0 for answer in post_as_nums[post_as_nums.columns[1]]] 
    convert_c2 = [1 if answer == 1 else 0 for answer in post_as_nums[post_as_nums.columns[2]]] 
    convert_c3 = [1 if answer == 1 else 0 for answer in post_as_nums[post_as_nums.columns[3]]] 
    convert_c4 = [1 if answer == 6 \
                   else 0 for answer in post_as_nums[post_as_nums.columns[4]]] 
    convert_c5 = [1 if answer == 6 \
                   else 0 for answer in post_as_nums[post_as_nums.columns[5]]] 

    # replace values in df with binary values
    # pre
    pre_as_nums[pre_as_nums.columns[0]] = convert_col0
    pre_as_nums[pre_as_nums.columns[1]] = convert_col1
    pre_as_nums[pre_as_nums.columns[2]] = convert_col2
    pre_as_nums[pre_as_nums.columns[3]] = convert_col3
    pre_as_nums[pre_as_nums.columns[4]] = convert_col4
    pre_as_nums[pre_as_nums.columns[5]] = convert_col5  
    # post
    post_as_nums[post_as_nums.columns[0]] = convert_c0
    post_as_nums[post_as_nums.columns[1]] = convert_c1
    post_as_nums[post_as_nums.columns[2]] = convert_c2
    post_as_nums[post_as_nums.columns[3]] = convert_c3
    post_as_nums[post_as_nums.columns[4]] = convert_c4
    post_as_nums[post_as_nums.columns[5]] = convert_c5  
    
    # build pre df
    pre_scores = pd.DataFrame(pre_as_nums.mean())#.T
    pre_scores.loc[len(pre_scores)] = np.mean(pre_as_nums.sum(axis=1)/6)
    pre_scores['question'] = ['Q1','Q2','Q3','Q4','Q5','Q6','Avg']
    pre_scores['group'] = "pre"
    
    # build post df
    post_scores = pd.DataFrame(post_as_nums.mean())#.T
    post_scores.loc[len(post_scores)] = np.mean(post_as_nums.sum(axis=1)/6)
    post_scores['question'] = ['Q1','Q2','Q3','Q4','Q5','Q6','Avg']
    post_scores['group'] = "post"
    
    # combine pre and post dfs
    compare = pd.concat([pre_scores, post_scores])
    compare.columns = ['score', 'question', 'group']
    compare.reset_index(drop=True, inplace=True)
    
    return compare

## Create df for comparison

In [None]:
scores = scores_df(pre_k,post_k)
scores

## Reminder of the questions

In [None]:
whole_q = {
    "Q1":'Fluoridated products, such as fluoridated toothpaste and fluoridated water, can help improve the oral health of residents.',
    "Q2":'It is normal for healthy gums to bleed when brushing teeth.',
    "Q3":'Dry mouth can have a negative effect on overall oral health.',
    "Q4":'Snacking throughout the day can have a negative impact on oral health.',
    "Q5":'How often should residents brush their teeth each day?',
    "Q6":'How often should residents floss their teeth each day?'
}

## Plot scores of pre and post training

In [None]:
# TODO add save plot 
fig, ax = plt.subplots(figsize=(10, 7))
sns.barplot(data = scores, 
              x = 'score', 
              y='question',
              hue = 'group', 
              dodge=True).set(title="Comparison of 'Knowledge' Scores Between Pre- & Post- Training");

plt.legend(#handles = [mpatches.Patch(color=i) for i in colors],
               #labels=[f"{i}: {df['count'][i]}" for i in df.T.columns],
               facecolor="white", 
               fontsize=15,
               bbox_to_anchor=(1.05, 1), 
               loc='upper left', 
               borderaxespad=0.);

plt.savefig("../images/post_intervention/compare_knowledge", bbox_inches='tight');

## Attitude scores

## convert values

In [None]:
positive_convert = {

    'Strongly Agree':1,
    'Agree':.75,
    'Neutral':.5,
    'Disagree':.25,
    'Strongly Disagree':0
}

negative_convert = {

    'Strongly Agree':1,
    'Agree':.75,
    'Neutral':.5,
    'Disagree':.25,
    'Strongly Disagree':0
}

In [None]:
def compare_attitude(pre,post):
    pre_p_att = pre[
        [
            'I believe I have effective techniques which I use to brush the teeth of residents under my care.',
            'I believe I have previously received adequate training to help provide the best oral care possible to residents under my care.',
            'I feel comfortable assisting residents in the safe use of fluoridated dental products (such as fluoridated toothpaste).',
            'I am able to confidently recognize non-verbal signs of pain in residents under my care.',
            'I feel confident that I have the knowledge to identify when residents under my care experience oral pain.'
        ]
    ]

    pre_n_att = pre[
        [
            'I believe residents under my care have oral health care needs which require further training to adequately understand and help manage.'    
        ]
    ]

    post_p_att = post[
        [
            'I believe I have effective techniques which I use to brush the teeth of residents under my care.',
            'I believe I have previously received adequate training to help provide the best oral care possible to residents under my care.',
            'I feel comfortable assisting residents in the safe use of fluoridated dental products (such as fluoridated toothpaste).',
            'I am able to confidently recognize non-verbal signs of pain in residents under my care.',
            'I feel confident that I have the knowledge to identify when residents under my care experience oral pain.'
        ]
    ]

    post_n_att = post[
        [
            'I believe residents under my care have oral health care needs which require further training to adequately understand and help manage.'    
        ]
    ]
    
    
    pre_p_scored = pre_p_att.replace(positive_convert)
    pre_n_scored = pre_n_att.replace(negative_convert)
    pre_att_scored = pd.concat([pre_p_scored,pre_n_scored], axis=1)

    post_p_scored = post_p_att.replace(positive_convert)
    post_n_scored = post_n_att.replace(negative_convert)
    post_att_scored = pd.concat([post_p_scored,post_n_scored], axis=1)
       
    # build pre df
    pre_att= pd.DataFrame(pre_att_scored.mean())#.T
    pre_att.loc[len(pre_att)] = np.mean(pre_att_scored.sum(axis=1)/6)
    pre_att['question'] = ['Q1','Q2','Q3','Q4','Q5','Q6','Avg']
    pre_att['group'] = "pre"
    
    # build post df
    post_att= pd.DataFrame(post_att_scored.mean())#.T
    post_att.loc[len(post_att)] = np.mean(post_att_scored.sum(axis=1)/6)
    post_att['question'] = ['Q1','Q2','Q3','Q4','Q5','Q6','Avg']
    post_att['group'] = "post"

    # combine pre and post dfs
    compare = pd.concat([pre_att, post_att])
    compare.columns = ['score', 'question', 'group']
    compare.reset_index(drop=True, inplace=True)
    
    return compare



In [None]:
att_scores = compare_attitude(sig_testing, post)

In [None]:
att_scores

In [None]:
att_questions = {
    "Q1":'I believe I have effective techniques which I use to brush the teeth of residents under my care.',
    "Q2":'I believe I have previously received adequate training to help provide the best oral care possible to residents under my care.',
    "Q3":'I feel comfortable assisting residents in the safe use of fluoridated dental products (such as fluoridated toothpaste).',
    "Q4":'I am able to confidently recognize non-verbal signs of pain in residents under my care.',
    "Q5":'I feel confident that I have the knowledge to identify when residents under my care experience oral pain.',
    "Q6":'I believe residents under my care have oral health care needs which require further training to adequately understand and help manage.'
}

In [None]:
# TODO add save plot 
fig, ax = plt.subplots(figsize=(10, 7))
sns.barplot(data = att_scores, 
              x = 'score', 
              y='question',
              hue = 'group', 
              dodge=True).set(title="Comparison of 'Attitude' Scores Between Pre- & Post- Training");

plt.legend(#handles = [mpatches.Patch(color=i) for i in colors],
               #labels=[f"{i}: {df['count'][i]}" for i in df.T.columns],
               facecolor="white", 
               fontsize=15,
               bbox_to_anchor=(1.05, 1), 
               loc='upper left', 
               borderaxespad=0.);
plt.savefig("../images/post_intervention/compare_attitude", bbox_inches='tight');

# Sig testing

## extract just attitude questions

In [None]:
att_cols = [
    'I believe I have effective techniques which I use to brush the teeth of residents under my care.',
    'I believe I have previously received adequate training to help provide the best oral care possible to residents under my care.',
    'I feel comfortable assisting residents in the safe use of fluoridated dental products (such as fluoridated toothpaste).',
    'I am able to confidently recognize non-verbal signs of pain in residents under my care.',
    'I feel confident that I have the knowledge to identify when residents under my care experience oral pain.',
    'I believe residents under my care have oral health care needs which require further training to adequately understand and help manage.'
]

In [None]:
# pre data
pre_att_strings = sig_testing[att_cols]

# post data
post_att_strings = post[att_cols]

## create convertion dict

In [None]:
convert_dict = {
    # string values: num values
    'Strongly Agree':5,
    'Agree':4,
    'Neutral':3,
    'Disagree':2,
    'Strongly Disagree':1
}

## sep by positive and negative

In [None]:
# not reverse coding anymore after talking to Doctor Rozdolski

## convert string values to numbers

In [None]:
pre_att_nums = pre_att_strings.replace(convert_dict)
pre_att_nums

In [None]:
post_att_nums = post_att_strings.replace(convert_dict)
post_att_nums

In [None]:
from scipy import stats

In [None]:
# H0: the pre and post respondents will have the same attitude scores
# H1: the post respondents will have higher attitude scores

In [None]:
print(stats.mannwhitneyu(pre_att_nums, post_att_nums, 
                         alternative="less",
                         method="asymptotic")[1])

In [None]:
q_sigs = stats.mannwhitneyu(pre_att_nums, post_att_nums, 
                            alternative="less",
                            method="asymptotic")[1]
for i in q_sigs:
    if i <= .05:
        print("reject null hypothesis, There is a statistically significant improvement in the scores of post training surveys")
    else:
        print("do not reject null hypothesis")

In [None]:
pre_att_nums.shape

In [None]:
post_att_nums.shape

In [None]:
# replace string values with numbers
convert_dict = {
    
    # true/false (knowledge)
    'True':1,
    'False':2,
    
    # frequency (knowledge)
    'Less than 1 time each month':1,
    '1 time each month': 2,
    '1 time each week':3,
    '2 to 3 times each week':4,
    '1 time each day':5,
    '2 to 3 times each day':6,
    
}

pre_as_nums = pre_k.replace(convert_dict)
post_as_nums = post_k.replace(convert_dict)

# convert pre training responses to binary
convert_col0 = [1 if answer == 1 else 0 for answer in pre_as_nums[pre_as_nums.columns[0]]] 
convert_col1 = [1 if answer == 2 else 0 for answer in pre_as_nums[pre_as_nums.columns[1]]] 
convert_col2 = [1 if answer == 1 else 0 for answer in pre_as_nums[pre_as_nums.columns[2]]] 
convert_col3 = [1 if answer == 1 else 0 for answer in pre_as_nums[pre_as_nums.columns[3]]] 
convert_col4 = [1 if answer == 6 \
               else 0 for answer in pre_as_nums[pre_as_nums.columns[4]]] 
convert_col5 = [1 if answer == 6 \
               else 0 for answer in pre_as_nums[pre_as_nums.columns[5]]] 

# convert post training responses to binary    
convert_c0 = [1 if answer == 1 else 0 for answer in post_as_nums[post_as_nums.columns[0]]] 
convert_c1 = [1 if answer == 2 else 0 for answer in post_as_nums[post_as_nums.columns[1]]] 
convert_c2 = [1 if answer == 1 else 0 for answer in post_as_nums[post_as_nums.columns[2]]] 
convert_c3 = [1 if answer == 1 else 0 for answer in post_as_nums[post_as_nums.columns[3]]] 
convert_c4 = [1 if answer == 6 \
               else 0 for answer in post_as_nums[post_as_nums.columns[4]]] 
convert_c5 = [1 if answer == 6 \
               else 0 for answer in post_as_nums[post_as_nums.columns[5]]] 

# replace values in df with binary values
# pre
pre_as_nums[pre_as_nums.columns[0]] = convert_col0
pre_as_nums[pre_as_nums.columns[1]] = convert_col1
pre_as_nums[pre_as_nums.columns[2]] = convert_col2
pre_as_nums[pre_as_nums.columns[3]] = convert_col3
pre_as_nums[pre_as_nums.columns[4]] = convert_col4
pre_as_nums[pre_as_nums.columns[5]] = convert_col5  
# post
post_as_nums[post_as_nums.columns[0]] = convert_c0
post_as_nums[post_as_nums.columns[1]] = convert_c1
post_as_nums[post_as_nums.columns[2]] = convert_c2
post_as_nums[post_as_nums.columns[3]] = convert_c3
post_as_nums[post_as_nums.columns[4]] = convert_c4
post_as_nums[post_as_nums.columns[5]] = convert_c5  

In [None]:
post_as_nums

In [None]:
print(stats.mannwhitneyu(pre_as_nums, post_as_nums)[1])

In [None]:
q_sigs = stats.mannwhitneyu(pre_as_nums, post_as_nums, 
                            alternative="less",
                            method="asymptotic")[1]
for i in q_sigs:
    if i <= .05:
        print("reject null hypothesis, There is a statistically significant improvement in the scores of post training surveys")
    else:
        print("do not reject null hypothesis")

# Find Mann Whitney U Ranked Order

In [None]:
rank_convert = {

    'Strongly Agree':5,
    'Agree':4,
    'Neutral':3,
    'Disagree':2,
    'Strongly Disagree':1
}


In [None]:
def sum_of_ranks(question,pre_df,post_df,convert_dict):
    
    temp_pre = pre_df[[question]].replace(rank_convert)
    temp_pre['group'] = 'pre'
    
    temp_post = post_df[[question]].replace(convert_dict)
    temp_post['group'] = 'post'
     
    temp_concat = pd.concat([temp_pre, temp_post])# combine into one df
    
    # give original ranks
    temp_concat = temp_concat.sort_values(by=[question]).reset_index(drop=True)
    temp_concat['original_rank'] = range(1,1+len(temp_concat[question]))
    
    # deal with ties
    new_rank_1 = temp_concat[temp_concat[question]==1].mean()[1]
    new_rank_2 = temp_concat[temp_concat[question]==2].mean()[1]
    new_rank_3 = temp_concat[temp_concat[question]==3].mean()[1]
    new_rank_4 = temp_concat[temp_concat[question]==4].mean()[1]
    new_rank_5 = temp_concat[temp_concat[question]==5].mean()[1]
    
    temp_concat['new_ranks'] = 0.0
    for i,response in enumerate(temp_concat[question]):
        if response == 1:
            temp_concat['new_ranks'][i] = new_rank_1
        elif response == 2:
            temp_concat['new_ranks'][i] = new_rank_2
        elif response == 3:
            temp_concat['new_ranks'][i] = new_rank_3
        elif response == 4:
            temp_concat['new_ranks'][i] = new_rank_4
        else:
            temp_concat['new_ranks'][i] = new_rank_5
            
    # calculate sum of ranks
    R_pre = temp_concat[temp_concat['group']=='pre']['new_ranks'].sum()
    R_post = temp_concat[temp_concat['group']=='post']['new_ranks'].sum()
    
    return R_pre, R_post

def U_values(R_pre,R_post,pre_df,post_df):
    
    n_pre = pre_df.shape[0]
    n_post = post_df.shape[0]
    
    U_pre = ( (n_pre*n_post) + ((n_pre * (n_pre+1))/2) ) - R_pre
    U_post =( (n_pre*n_post) + ((n_post * (n_post+1))/2) ) - R_post
    
    return U_pre, U_post

In [None]:
r_pre, r_post = sum_of_ranks(att_questions['Q1'], sig_testing, post, rank_convert)
U_pre, U_post = U_values(r_pre, r_post, sig_testing, post)

print(f"""
question: {att_questions['Q1']}
R1 = {r_pre}
R2 = {r_post}
U1 = {U_pre}
U2 = {U_post}
""")

In [None]:
r_pre, r_post = sum_of_ranks(att_questions['Q2'], sig_testing, post, rank_convert)
U_pre, U_post = U_values(r_pre, r_post, sig_testing, post)

print(f"""
question: {att_questions['Q2']}
R1 = {r_pre}
R2 = {r_post}
U1 = {U_pre}
U2 = {U_post}
""")

In [None]:
r_pre, r_post = sum_of_ranks(att_questions['Q3'], sig_testing, post, rank_convert)
U_pre, U_post = U_values(r_pre, r_post, sig_testing, post)

print(f"""
question: {att_questions['Q3']}
R1 = {r_pre}
R2 = {r_post}
U1 = {U_pre}
U2 = {U_post}
""")

In [None]:
r_pre, r_post = sum_of_ranks(att_questions['Q4'], sig_testing, post, rank_convert)
U_pre, U_post = U_values(r_pre, r_post, sig_testing, post)

print(f"""
question: {att_questions['Q4']}
R1 = {r_pre}
R2 = {r_post}
U1 = {U_pre}
U2 = {U_post}
""")

In [None]:
r_pre, r_post = sum_of_ranks(att_questions['Q5'], sig_testing, post, rank_convert)
U_pre, U_post = U_values(r_pre, r_post, sig_testing, post)

print(f"""
question: {att_questions['Q5']}
R1 = {r_pre}
R2 = {r_post}
U1 = {U_pre}
U2 = {U_post}
""")

In [None]:
r_pre, r_post = sum_of_ranks(att_questions['Q6'], sig_testing, post, rank_convert)
U_pre, U_post = U_values(r_pre, r_post, sig_testing, post)

print(f"""
question: {att_questions['Q6']}
R1 = {r_pre}
R2 = {r_post}
U1 = {U_pre}
U2 = {U_post}
""")

In [None]:
pre_att_strings.tail()

In [None]:
col = pre_att_strings.columns[0]
distributions(pre_att_strings[col], 
              "Respondents' Belief They Have Effective Teeth Brushing \nTechniques to Aid Residents (Sig Testing)",
              order="agree",
              title_fontsize=18,
              figsize=(9,5),
              rotate_xlabels=45,
              filepath="../images/post_intervention/sig_testing/effective_teeth_brushing_techniques_SIG")


In [None]:
col = pre_att_strings.columns[1]
distributions(pre_att_strings[col], 
              "Respondents' Belief They Have Adequate Training (Sig Testing)",
              order="agree",
              title_fontsize=18,
              figsize=(9,5),
              rotate_xlabels=45,
              filepath="../images/post_intervention/sig_testing/adequate_training_SIG")


In [None]:
col = pre_att_strings.columns[2]
distributions(pre_att_strings[col], 
              "Respondents' Comfort Assisting Residents in the \nSafe Use of Fluoridated Dental Products (Sig Testing)",
              order="agree",
              title_fontsize=18,
              figsize=(9,5),
              rotate_xlabels=45,
              filepath="../images/post_intervention/sig_testing/comfort_assisting_SIG")


In [None]:
col = pre_att_strings.columns[3]
distributions(pre_att_strings[col], 
              "Respondents' Confidence in Ability to Recognize \nNon-Verbal Signs of Pain in Residents (Sig Testing)",
              order="agree",
              title_fontsize=18,
              figsize=(9,5),
              rotate_xlabels=45,
              filepath="../images/post_intervention/sig_testing/confidence_identify_non_verbal_pain_SIG")


In [None]:
col = pre_att_strings.columns[4]
distributions(pre_att_strings[col], 
              "Respondents' Confidence They Have Adequate \nKnowledge to Identify Residents' Oral Pain  (Sig Testing)",
              order="agree",
              title_fontsize=18,
              figsize=(9,5),
              rotate_xlabels=45,
              filepath="../images/post_intervention/sig_testing/confidence_identify_oral_pain_SIG")


In [None]:
col = pre_att_strings.columns[5]
distributions(pre_att_strings[col], 
              "Residents Have Oral Health Care Needs \nWhich Require Further Training  (Sig Testing)",
              order="agree",
              title_fontsize=18,
              figsize=(9,5),
              rotate_xlabels=45,
              filepath="../images/post_intervention/sig_testing/needed_further_training_SIG")


In [None]:

col = pre_att_strings.columns[5]
pre_att_strings[col].value_counts()

In [None]:
def print_percentage_of_responses(pre_SA, pre_A, pre_N, pre_D, pre_SD, post_SA, post_A, post_N, post_D, post_SD):
    pre2_SA, pre2_A, pre2_N, pre2_D, pre2_SD = pre_SA/115, pre_A/115, pre_N/115, pre_D/115, pre_SD/115
    post2_SA, post2_A, post2_N, post2_D, post2_SD = post_SA/63, post_A/63, post_N/63, post_D/63, post_SD/63
    
    print(f"""
         PRE       POST
    SA  {pre2_SA}  {post2_SA}
    A   {pre2_A}  {post2_A}
    N   {pre2_N}  {post2_N}  
    D   {pre2_D}  {post2_D}
    SD  {pre2_SD}  {post2_SD}
    """)

In [None]:
print_percentage_of_responses(11,53,32,8,11,12,27,19,4,1) #Q6

In [None]:
print_percentage_of_responses(14,60,33,7,1,6,34,21,1,1) #Q5

In [None]:
print_percentage_of_responses(19,69,24,2,1,16,33,11,2,1) #Q4

In [None]:
print_percentage_of_responses(20,60,28,6,1,17,28,15,1,2) #Q3

In [None]:
print_percentage_of_responses(9,24,57,19,6,9,21,26,3,4) #Q2

In [None]:
print_percentage_of_responses(14,49,41,8,3,12,30,20,0,1) #Q1

In [None]:
# decrease in N,D,SD and increase in SA,A