# Define Custom Functions

In [15]:
import numpy as np
import pandas as pd
import re

In [16]:
def get_text(path):
    with open(path) as file:
        text = np.loadtxt(file, delimiter='\n',dtype='str')

    return pd.DataFrame(text)

In [17]:
def add_and_modify_columns(df):
    columns_to_add = ['strongly agree','agree','neither agree','disagree','strongly disagree']
    df[columns_to_add] = 0
    mapper = {0:'question'}
    df.rename(mapper,inplace=True,axis=1)
    return df

In [18]:
def extract_percentages(df):
    full_list = []
    for index in range(1,20):
        if index%2 != 0:
            row = df.iloc[index,0]
            row = re.findall('[0-9] | [0-9][0-9]',str(row))
            values = [float(x) for x in row]
            full_list.append(values)
    return full_list    

In [19]:
def reset_indices(df):
    indexes = []
    for index in range(1,20):
        if index%2 != 0:
            indexes.append(index)

    df.drop(indexes,axis=0,inplace=True)
    df.reset_index(drop=True,inplace=True)
#return df

In [20]:
def insert_values_into(df):
    p = extract_percentages(df)
    reset_indices(df)
    data_x = pd.DataFrame(p,columns=df.columns[1:])
    return data_x

In [21]:
def add_questions_and_reorder_columns(df):
    df['question'] = data['question']
    #mapper = {0:'question'}
    #data_n.rename(mapper,inplace=True,axis=1)
    cols = ['question','strongly agree','agree','neither agree','disagree','strongly disagree']
    df= df[cols]
    return df

# Create Datasets  Using a FunctionTransformer and Pipeline


In [22]:
from sklearn.preprocessing import FunctionTransformer as FT
from sklearn.pipeline import Pipeline

In [23]:
data = get_text('presurvey')

In [24]:
data_post = get_text('postsurvey')

In [25]:
add_and_modify_columns_t = FT(add_and_modify_columns)
insert_values_into_t = FT(insert_values_into)
add_questions_and_reorder_columns_t = FT(add_questions_and_reorder_columns)

In [26]:
pipe = Pipeline(
steps=[
    ('a&mc',add_and_modify_columns_t),
    ('ivi',insert_values_into_t),
    ('aqrc',add_questions_and_reorder_columns_t)
], verbose=True)

In [27]:
presurvey = pipe.fit_transform(data)
presurvey.to_csv('presurvey_tr_pipe')

[Pipeline] .............. (step 1 of 3) Processing a&mc, total=   0.0s
[Pipeline] ............... (step 2 of 3) Processing ivi, total=   0.0s
[Pipeline] .............. (step 3 of 3) Processing aqrc, total=   0.0s


In [28]:
presurvey

Unnamed: 0,question,strongly agree,agree,neither agree,disagree,strongly disagree
0,"I feel comftable discussing my background, bel...",0.0,10.0,40.0,40.0,10.0
1,Perspectives like mine are included in decisio...,0.0,0.0,20.0,30.0,50.0
2,The people I wk with are respectful to one ano...,0.0,20.0,30.0,30.0,20.0
3,This ganization provides a safe environment f ...,0.0,0.0,20.0,50.0,30.0
4,I can voice a contrary opinion without fear of...,0.0,0.0,30.0,20.0,50.0
5,I feel I can achieve success as my authentic s...,0.0,20.0,20.0,40.0,20.0
6,I can attend to an urgent personal matter duri...,0.0,20.0,0.0,70.0,10.0
7,This ganization encourages a good balance betw...,0.0,0.0,0.0,80.0,20.0
8,I feel like I belong at this ganization.,0.0,20.0,30.0,50.0,0.0
9,The ganization has a diverse management team. ...,0.0,0.0,30.0,30.0,30.0


In [29]:
postsurvey = pipe.fit_transform(data_post)
postsurvey.to_csv('postsurvey_tr_pipe')

[Pipeline] .............. (step 1 of 3) Processing a&mc, total=   0.0s
[Pipeline] ............... (step 2 of 3) Processing ivi, total=   0.0s
[Pipeline] .............. (step 3 of 3) Processing aqrc, total=   0.0s


In [30]:
postsurvey

Unnamed: 0,question,strongly agree,agree,neither agree,disagree,strongly disagree
0,"I feel comftable discussing my background, bel...",0.0,30.0,40.0,30.0,0.0
1,Perspectives like mine are included in decisio...,0.0,30.0,20.0,30.0,10.0
2,The people I wk with are respectful to one ano...,0.0,60.0,0.0,40.0,0.0
3,This ganization provides a safe environment f ...,0.0,20.0,60.0,20.0,0.0
4,I can voice a contrary opinion without fear of...,0.0,0.0,50.0,30.0,20.0
5,I feel I can achieve success as my authentic s...,0.0,30.0,50.0,10.0,10.0
6,I can attend to an urgent personal matter duri...,0.0,60.0,10.0,20.0,10.0
7,This ganization encourages a good balance betw...,0.0,10.0,50.0,30.0,10.0
8,I feel like I belong at this ganization.,0.0,50.0,40.0,10.0,0.0
9,The ganization has a diverse management team. ...,0.0,0.0,50.0,20.0,30.0
