# Define Custom Functions

In [150]:
import numpy as np
import pandas as pd
import re

In [151]:
def get_text(path):
    with open(path) as file:
        text = np.loadtxt(file, delimiter='\n',dtype='str')

    return pd.DataFrame(text)

In [152]:
def add_and_modify_columns(df):
    columns_to_add = ['strongly agree','agree','neither agree','disagree','strongly disagree']
    df[columns_to_add] = 0
    mapper = {0:'question'}
    df.rename(mapper,inplace=True,axis=1)
    return df

In [153]:
def extract_percentages(df):
    full_list = []
    for index in range(1,20):
        if index%2 != 0:
            row = df.iloc[index,0]
            row = re.findall('[0-9] | [0-9][0-9]',str(row))
            values = [float(x) for x in row]
            full_list.append(values)
    return full_list    

In [154]:
def reset_indices(df):
    indexes = []
    for index in range(1,20):
        if index%2 != 0:
            indexes.append(index)

    df.drop(indexes,axis=0,inplace=True)
    df.reset_index(drop=True,inplace=True)
#return df

In [155]:
def insert_values_into(df):
    p = extract_percentages(df)
    reset_indices(df)
    data_x = pd.DataFrame(p,columns=df.columns[1:])
    return data_x

In [156]:
def add_questions_and_reorder_columns(df):
    df['question'] = data['question']
    #mapper = {0:'question'}
    #data_n.rename(mapper,inplace=True,axis=1)
    cols = ['question','strongly agree','agree','neither agree','disagree','strongly disagree']
    df= df[cols]
    return df

# Create Datasets  Using a FunctionTransformer and Pipeline


In [157]:
from sklearn.preprocessing import FunctionTransformer as FT
from sklearn.pipeline import Pipeline

In [158]:
data = get_text('presurvey')

In [159]:
data_post = get_text('postsurvey')

In [160]:
add_and_modify_columns_t = FT(add_and_modify_columns)
insert_values_into_t = FT(insert_values_into)
add_questions_and_reorder_columns_t = FT(add_questions_and_reorder_columns)

In [161]:
pipe = Pipeline(
steps=[
    ('a&mc',add_and_modify_columns_t),
    ('ivi',insert_values_into_t),
    ('aqrc',add_questions_and_reorder_columns_t)
], verbose=True)

In [162]:
presurvey = pipe.fit_transform(data)
#presurvey.to_csv('presurvey_tr_pipe')

[Pipeline] .............. (step 1 of 3) Processing a&mc, total=   0.0s
[Pipeline] ............... (step 2 of 3) Processing ivi, total=   0.0s
[Pipeline] .............. (step 3 of 3) Processing aqrc, total=   0.0s


In [163]:
postsurvey = pipe.fit_transform(data_post)
#postsurvey.to_csv('postsurvey_tr_pipe')

[Pipeline] .............. (step 1 of 3) Processing a&mc, total=   0.0s
[Pipeline] ............... (step 2 of 3) Processing ivi, total=   0.0s
[Pipeline] .............. (step 3 of 3) Processing aqrc, total=   0.0s
