# Define Custom Functions

In [52]:
import numpy as np
import pandas as pd
import re

In [53]:
def get_text(path):
    with open(path) as file:
        text = np.loadtxt(file, delimiter='\n',dtype='str')

    return pd.DataFrame(text)

In [54]:
def add_and_modify_columns(df):
    columns_to_add = ['strongly agree','agree','neither agree','disagree','strongly disagree']
    df[columns_to_add] = 0
    mapper = {0:'question'}
    df.rename(mapper,inplace=True,axis=1)
    return df

In [55]:
def extract_percentages(df):
    full_list = []
    for index in range(1,20):
        if index%2 != 0:
            row = df.iloc[index,0]
            row = re.findall('[0-9] | [0-9][0-9]',str(row))
            values = [float(x) for x in row]
            full_list.append(values)
    return full_list    

In [56]:
def reset_indices(df):
    indexes = []
    for index in range(1,20):
        if index%2 != 0:
            indexes.append(index)

    df.drop(indexes,axis=0,inplace=True)
    df.reset_index(drop=True,inplace=True)
#return df

In [57]:
def insert_values_into(df):
    p = extract_percentages(df)
    reset_indices(df)
    data_x = pd.DataFrame(p,columns=df.columns[1:])
    return data_x

In [58]:
def add_questions_and_reorder_columns(df):
    df['question'] = data['question']
    #mapper = {0:'question'}
    #data_n.rename(mapper,inplace=True,axis=1)
    cols = ['question','strongly agree','agree','neither agree','disagree','strongly disagree']
    df= df[cols]
    return df

# Create Datasets  Using a FunctionTransformer and Pipeline


In [59]:
from sklearn.preprocessing import FunctionTransformer as FT
from sklearn.pipeline import Pipeline

In [60]:
data = get_text('presurveytext.txt')
data

Unnamed: 0,0
0,"I feel comftable discussing my background, bel..."
1,0 10 40 40 10
2,Perspectives like mine are included in decisio...
3,0 0 20 30 50
4,The people I wk with are respectful to one ano...
5,0 20 30 30 20
6,This ganization provides a safe environment f ...
7,0 0 20 50 30
8,I can voice a contrary opinion without fear of...
9,0 0 30 20 50


In [61]:
data_post = get_text('postsurveytext.txt')

In [62]:
add_and_modify_columns_t = FT(add_and_modify_columns)
insert_values_into_t = FT(insert_values_into)
add_questions_and_reorder_columns_t = FT(add_questions_and_reorder_columns)

In [63]:
pipe = Pipeline(
steps=[
    ('a&mc',add_and_modify_columns_t),
    ('ivi',insert_values_into_t),
    ('aqrc',add_questions_and_reorder_columns_t)
], verbose=True)

In [64]:
presurvey = pipe.fit_transform(data)
presurvey.to_csv('presurvey_tr_pipe.csv')

[Pipeline] .............. (step 1 of 3) Processing a&mc, total=   0.0s
[Pipeline] ............... (step 2 of 3) Processing ivi, total=   0.0s
[Pipeline] .............. (step 3 of 3) Processing aqrc, total=   0.0s


In [65]:
presurvey

Unnamed: 0,question,strongly agree,agree,neither agree,disagree,strongly disagree
0,"I feel comftable discussing my background, bel...",0.0,10.0,40.0,40.0,10.0
1,Perspectives like mine are included in decisio...,0.0,0.0,20.0,30.0,50.0
2,The people I wk with are respectful to one ano...,0.0,20.0,30.0,30.0,20.0
3,This ganization provides a safe environment f ...,0.0,0.0,20.0,50.0,30.0
4,I can voice a contrary opinion without fear of...,0.0,0.0,30.0,20.0,50.0
5,I feel I can achieve success as my authentic s...,0.0,20.0,20.0,40.0,20.0
6,I can attend to an urgent personal matter duri...,0.0,20.0,0.0,70.0,10.0
7,This ganization encourages a good balance betw...,0.0,0.0,0.0,80.0,20.0
8,I feel like I belong at this ganization.,0.0,20.0,30.0,50.0,0.0
9,The ganization has a diverse management team. ...,0.0,0.0,30.0,30.0,30.0


In [66]:
postsurvey = pipe.fit_transform(data_post)
postsurvey.to_csv('postsurvey_tr_pipe.csv')

[Pipeline] .............. (step 1 of 3) Processing a&mc, total=   0.0s
[Pipeline] ............... (step 2 of 3) Processing ivi, total=   0.0s
[Pipeline] .............. (step 3 of 3) Processing aqrc, total=   0.0s


In [67]:
postsurvey

Unnamed: 0,question,strongly agree,agree,neither agree,disagree,strongly disagree
0,"I feel comftable discussing my background, bel...",0.0,30.0,40.0,30.0,0.0
1,Perspectives like mine are included in decisio...,0.0,30.0,20.0,30.0,10.0
2,The people I wk with are respectful to one ano...,0.0,60.0,0.0,40.0,0.0
3,This ganization provides a safe environment f ...,0.0,20.0,60.0,20.0,0.0
4,I can voice a contrary opinion without fear of...,0.0,0.0,50.0,30.0,20.0
5,I feel I can achieve success as my authentic s...,0.0,30.0,50.0,10.0,10.0
6,I can attend to an urgent personal matter duri...,0.0,60.0,10.0,20.0,10.0
7,This ganization encourages a good balance betw...,0.0,10.0,50.0,30.0,10.0
8,I feel like I belong at this ganization.,0.0,50.0,40.0,10.0,0.0
9,The ganization has a diverse management team. ...,0.0,0.0,50.0,20.0,30.0


In [68]:
#confirm that the columns total up to 100%
for i in postsurvey.index:
    l = list(presurvey.iloc[i,1:])
    l2 = list(postsurvey.iloc[i,1:])
    total = sum(l)
    total2 = sum(l2)
    print(total,total2,"     ",i)

100.0 100.0       0
100.0 90.0       1
100.0 100.0       2
100.0 100.0       3
100.0 100.0       4
100.0 100.0       5
100.0 100.0       6
100.0 100.0       7
100.0 100.0       8
90.0 100.0       9


In [69]:
#add the missing data to make sure percentages add up to 100%
presurvey.iloc[9,1] = 10.0

In [70]:
presurvey

Unnamed: 0,question,strongly agree,agree,neither agree,disagree,strongly disagree
0,"I feel comftable discussing my background, bel...",0.0,10.0,40.0,40.0,10.0
1,Perspectives like mine are included in decisio...,0.0,0.0,20.0,30.0,50.0
2,The people I wk with are respectful to one ano...,0.0,20.0,30.0,30.0,20.0
3,This ganization provides a safe environment f ...,0.0,0.0,20.0,50.0,30.0
4,I can voice a contrary opinion without fear of...,0.0,0.0,30.0,20.0,50.0
5,I feel I can achieve success as my authentic s...,0.0,20.0,20.0,40.0,20.0
6,I can attend to an urgent personal matter duri...,0.0,20.0,0.0,70.0,10.0
7,This ganization encourages a good balance betw...,0.0,0.0,0.0,80.0,20.0
8,I feel like I belong at this ganization.,0.0,20.0,30.0,50.0,0.0
9,The ganization has a diverse management team. ...,10.0,0.0,30.0,30.0,30.0


In [71]:
postsurvey.iloc[1,1] = 10.0

In [72]:
postsurvey

Unnamed: 0,question,strongly agree,agree,neither agree,disagree,strongly disagree
0,"I feel comftable discussing my background, bel...",0.0,30.0,40.0,30.0,0.0
1,Perspectives like mine are included in decisio...,10.0,30.0,20.0,30.0,10.0
2,The people I wk with are respectful to one ano...,0.0,60.0,0.0,40.0,0.0
3,This ganization provides a safe environment f ...,0.0,20.0,60.0,20.0,0.0
4,I can voice a contrary opinion without fear of...,0.0,0.0,50.0,30.0,20.0
5,I feel I can achieve success as my authentic s...,0.0,30.0,50.0,10.0,10.0
6,I can attend to an urgent personal matter duri...,0.0,60.0,10.0,20.0,10.0
7,This ganization encourages a good balance betw...,0.0,10.0,50.0,30.0,10.0
8,I feel like I belong at this ganization.,0.0,50.0,40.0,10.0,0.0
9,The ganization has a diverse management team. ...,0.0,0.0,50.0,20.0,30.0


# Create Likert scale Dataset

In [73]:
def recreate_df():
    columns = []
    for i in range(1,11):
        columns.append('question_{}'.format(i))
    fill = {'Respondents ID':[x for x in range(1,11)]}
    recreated = pd.DataFrame(fill)
    recreated[columns] = 0
    return recreated

In [74]:
presurvey_likert_unfilled = recreate_df()
presurvey_likert_unfilled

Unnamed: 0,Respondents ID,question_1,question_2,question_3,question_4,question_5,question_6,question_7,question_8,question_9,question_10
0,1,0,0,0,0,0,0,0,0,0,0
1,2,0,0,0,0,0,0,0,0,0,0
2,3,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,0
4,5,0,0,0,0,0,0,0,0,0,0
5,6,0,0,0,0,0,0,0,0,0,0
6,7,0,0,0,0,0,0,0,0,0,0
7,8,0,0,0,0,0,0,0,0,0,0
8,9,0,0,0,0,0,0,0,0,0,0
9,10,0,0,0,0,0,0,0,0,0,0


In [75]:
#Randomly assign each survey participant a Likert score according to the percentage distributions above
def insert_likert_scores(unfilled_likert,survey_df):
    for col in unfilled_likert.columns[1:]:
        #initialize row counter
        for i in unfilled_likert.index:
            #Get percentages and convert them to probabilities
            prob = list(survey_df.iloc[1,1:])
            prob = [x/100 for x in prob]
            #Create Likert data based on percentages
            samples = [x for x in range(1,6)]
            s = np.random.choice(samples,size=10,p = prob)
            #Assign Likert data to respective columns
            unfilled_likert[col] = s
            filled_likert = unfilled_likert.copy()
    return filled_likert

# Fill Presurvey Likert

In [76]:
presurvey_likert = insert_likert_scores(presurvey_likert_unfilled,presurvey)
presurvey_likert

Unnamed: 0,Respondents ID,question_1,question_2,question_3,question_4,question_5,question_6,question_7,question_8,question_9,question_10
0,1,5,5,4,5,4,3,4,4,4,3
1,2,5,5,5,3,3,5,5,4,4,5
2,3,4,3,4,5,5,3,4,5,5,3
3,4,5,4,5,5,5,3,3,5,4,4
4,5,5,4,5,5,5,5,5,5,5,3
5,6,5,4,5,5,4,4,3,5,3,5
6,7,3,5,5,5,3,3,4,5,3,5
7,8,4,4,5,5,5,4,3,3,4,4
8,9,3,4,5,4,5,4,4,3,3,3
9,10,5,5,5,5,5,5,4,4,5,5


# Fill Postsurvey Likert

In [77]:
postsurvey_likert_unfilled = recreate_df()
postsurvey_likert_unfilled

Unnamed: 0,Respondents ID,question_1,question_2,question_3,question_4,question_5,question_6,question_7,question_8,question_9,question_10
0,1,0,0,0,0,0,0,0,0,0,0
1,2,0,0,0,0,0,0,0,0,0,0
2,3,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,0
4,5,0,0,0,0,0,0,0,0,0,0
5,6,0,0,0,0,0,0,0,0,0,0
6,7,0,0,0,0,0,0,0,0,0,0
7,8,0,0,0,0,0,0,0,0,0,0
8,9,0,0,0,0,0,0,0,0,0,0
9,10,0,0,0,0,0,0,0,0,0,0


In [78]:
postsurvey_likert = insert_likert_scores(postsurvey_likert_unfilled,postsurvey)
postsurvey_likert

Unnamed: 0,Respondents ID,question_1,question_2,question_3,question_4,question_5,question_6,question_7,question_8,question_9,question_10
0,1,2,2,2,5,2,4,4,2,4,2
1,2,4,5,2,4,2,2,4,2,3,1
2,3,2,2,2,1,4,4,3,3,5,2
3,4,2,2,3,3,3,4,1,2,4,2
4,5,4,3,2,3,3,1,4,2,4,3
5,6,2,3,3,1,2,1,3,3,4,4
6,7,4,3,3,3,4,3,4,4,5,2
7,8,2,3,2,2,4,5,4,1,4,3
8,9,3,1,5,3,3,3,4,1,2,4
9,10,2,5,4,1,4,2,4,4,5,4


In [79]:
#import pyreadstat
#pyreadstat.write_sav(presurvey_likert, 'presurvey_likert')
#pyreadstat.write_sav(postsurvey_likert, 'postsurvey_likert')
presurvey_likert.to_csv('presurvey_likert.csv')
postsurvey_likert.to_csv('postsurvey_likert.csv')