# Create Presurvey Data

In [5]:
import numpy as np
import pandas as pd
import re

In [6]:
def get_text(path):
    with open(path) as file:
        text = np.loadtxt(file, delimiter='\n',dtype='str')

    return pd.DataFrame(text)

In [7]:
presurvey_path = 'presurveytext.txt'
data = get_text(presurvey_path)

In [8]:
def add_and_modify_columns(df):
    columns_to_add = ['strongly agree','agree','neither agree','disagree','strongly disagree']
    df[columns_to_add] = 0
    mapper = {0:'question'}
    df.rename(mapper,inplace=True,axis=1)
    return df

In [9]:
data = add_and_modify_columns(data)

In [10]:
def extract_percentages(df):
    full_list = []
    for index in range(1,20):
        if index%2 != 0:
            row = df.iloc[index,0]
            row = re.findall('[0-9] | [0-9][0-9]',str(row))
            values = [float(x) for x in row]
            full_list.append(values)
    return full_list

In [11]:
percentages = extract_percentages(data)

In [12]:
def reset_indices(df):
    indexes = []
    for index in range(1,20):
        if index%2 != 0:
            indexes.append(index)

    df.drop(indexes,axis=0,inplace=True)
    df.reset_index(drop=True,inplace=True)
    return df

In [13]:
data = reset_indices(data)

In [14]:
def insert_values_into(df, p):
    data_x = pd.DataFrame(p,columns=df.columns[1:])
    return data_x

data_n = insert_values_into(data,percentages)
data_n

Unnamed: 0,strongly agree,agree,neither agree,disagree,strongly disagree
0,0.0,10.0,40.0,40.0,10.0
1,0.0,0.0,20.0,30.0,50.0
2,0.0,20.0,30.0,30.0,20.0
3,0.0,0.0,20.0,50.0,30.0
4,0.0,0.0,30.0,20.0,50.0
5,0.0,20.0,20.0,40.0,20.0
6,0.0,20.0,0.0,70.0,10.0
7,0.0,0.0,0.0,80.0,20.0
8,0.0,20.0,30.0,50.0,0.0
9,0.0,0.0,30.0,30.0,30.0


In [15]:
def add_questions_and_reorder_columns(df):
    df['question'] = data['question']
    #mapper = {0:'question'}
    #data_n.rename(mapper,inplace=True,axis=1)
    cols = ['question','strongly agree','agree','neither agree','disagree','strongly disagree']
    df= df[cols]
    return df

In [16]:
data_n = add_questions_and_reorder_columns(data_n)
data_n

Unnamed: 0,question,strongly agree,agree,neither agree,disagree,strongly disagree
0,"I feel comftable discussing my background, bel...",0.0,10.0,40.0,40.0,10.0
1,Perspectives like mine are included in decisio...,0.0,0.0,20.0,30.0,50.0
2,The people I wk with are respectful to one ano...,0.0,20.0,30.0,30.0,20.0
3,This ganization provides a safe environment f ...,0.0,0.0,20.0,50.0,30.0
4,I can voice a contrary opinion without fear of...,0.0,0.0,30.0,20.0,50.0
5,I feel I can achieve success as my authentic s...,0.0,20.0,20.0,40.0,20.0
6,I can attend to an urgent personal matter duri...,0.0,20.0,0.0,70.0,10.0
7,This ganization encourages a good balance betw...,0.0,0.0,0.0,80.0,20.0
8,I feel like I belong at this ganization.,0.0,20.0,30.0,50.0,0.0
9,The ganization has a diverse management team. ...,0.0,0.0,30.0,30.0,30.0


In [17]:
data_n.to_csv('presurvey_wout_tr_pipe.csv')

# Create Postsurvey Dataset

In [18]:
postsurvey_path = 'postsurveytext.txt'
data_post = get_text(postsurvey_path)

In [19]:
data_post

Unnamed: 0,0
0,"1. I feel comftable cussig my backgroud, belie..."
1,0 30 40 30 0
2,2. Perspectives like mie are icluded i decisio...
3,10 30 20 30 10
4,3. The people I wk with are respectful to oe a...
5,0 60 0 40 0
6,4. This gaizatio provides a safe eviromet f th...
7,0 20 60 20 0
8,5. I ca voice a cotrary opiio without fear of ...
9,0 0 50 30 20


In [20]:
data_post = add_and_modify_columns(data_post)

In [21]:
percent = extract_percentages(data_post)
percent

[[0.0, 30.0, 40.0, 30.0, 0.0],
 [0.0, 30.0, 20.0, 30.0, 10.0],
 [0.0, 60.0, 0.0, 40.0, 0.0],
 [0.0, 20.0, 60.0, 20.0, 0.0],
 [0.0, 0.0, 50.0, 30.0, 20.0],
 [0.0, 30.0, 50.0, 10.0, 10.0],
 [0.0, 60.0, 10.0, 20.0, 10.0],
 [0.0, 10.0, 50.0, 30.0, 10.0],
 [0.0, 50.0, 40.0, 10.0, 0.0],
 [0.0, 0.0, 50.0, 20.0, 30.0]]

In [22]:
data_post = reset_indices(data_post)
data_post

Unnamed: 0,question,strongly agree,agree,neither agree,disagree,strongly disagree
0,"1. I feel comftable cussig my backgroud, belie...",0,0,0,0,0
1,2. Perspectives like mie are icluded i decisio...,0,0,0,0,0
2,3. The people I wk with are respectful to oe a...,0,0,0,0,0
3,4. This gaizatio provides a safe eviromet f th...,0,0,0,0,0
4,5. I ca voice a cotrary opiio without fear of ...,0,0,0,0,0
5,6. I feel I ca achieve success as my authetic ...,0,0,0,0,0
6,7. I ca atted to a urget persoal matter durig ...,0,0,0,0,0
7,8. This gaizatio ecourages a good balace betwe...,0,0,0,0,0
8,9. I feel like I belog at this gaizatio.,0,0,0,0,0
9,10. The gaizatio has a diverse maagemet team. ...,0,0,0,0,0


In [23]:
data_post = insert_values_into(data_post,percent)
data_post

Unnamed: 0,strongly agree,agree,neither agree,disagree,strongly disagree
0,0.0,30.0,40.0,30.0,0.0
1,0.0,30.0,20.0,30.0,10.0
2,0.0,60.0,0.0,40.0,0.0
3,0.0,20.0,60.0,20.0,0.0
4,0.0,0.0,50.0,30.0,20.0
5,0.0,30.0,50.0,10.0,10.0
6,0.0,60.0,10.0,20.0,10.0
7,0.0,10.0,50.0,30.0,10.0
8,0.0,50.0,40.0,10.0,0.0
9,0.0,0.0,50.0,20.0,30.0


In [24]:
data_post = add_questions_and_reorder_columns(data_post)
data_post.to_csv('postsurvey_wout_tr_pipe.csv')
data_post

Unnamed: 0,question,strongly agree,agree,neither agree,disagree,strongly disagree
0,"I feel comftable discussing my background, bel...",0.0,30.0,40.0,30.0,0.0
1,Perspectives like mine are included in decisio...,0.0,30.0,20.0,30.0,10.0
2,The people I wk with are respectful to one ano...,0.0,60.0,0.0,40.0,0.0
3,This ganization provides a safe environment f ...,0.0,20.0,60.0,20.0,0.0
4,I can voice a contrary opinion without fear of...,0.0,0.0,50.0,30.0,20.0
5,I feel I can achieve success as my authentic s...,0.0,30.0,50.0,10.0,10.0
6,I can attend to an urgent personal matter duri...,0.0,60.0,10.0,20.0,10.0
7,This ganization encourages a good balance betw...,0.0,10.0,50.0,30.0,10.0
8,I feel like I belong at this ganization.,0.0,50.0,40.0,10.0,0.0
9,The ganization has a diverse management team. ...,0.0,0.0,50.0,20.0,30.0
