In [191]:
import warnings
warnings.filterwarnings("ignore")

import pandas as pd # type: ignore
pd.set_option("display.max_columns", None)

In [192]:
def col_domain(df, col):
    print(col, end='\n\n')
    print(*df[col].unique(), sep='\n', end='\n\n')


def cols_with_missing_vals(df):
    missing_vals = df.isnull().sum()
    
    print(missing_vals[missing_vals > 0])


def impute_occupation_level(row):
    if row['Field of occupation'] == 'Unoccupied' and pd.isnull(row['Occupation level']):
        return 'Unoccupied'
    elif pd.isnull(row['Occupation level']):
        return df.loc[df['Field of occupation'] == row['Field of occupation'], 'Occupation level'].mode().iloc[0] # type: ignore
    else:
        return row['Occupation level']


def handle_missing_vals(df):
    df['Occupation level'] = df.apply(impute_occupation_level, axis='columns')

    df.dropna(axis='index', subset=['Country of residence'], inplace=True)

    df['S3Q11'].fillna('Other', inplace=True)

    df['S3Q12'].fillna('No', inplace=True)


def handle_inconsistencies(df):
    df = df[~df['S3Q8'].str.startswith('I think')]
    df['S3Q8'] = df['S3Q8'].str.strip()

    df['S3Q11'] = df['S3Q11'].str.replace('.', '')
    df['S3Q11'] = df['S3Q11'].str.strip()
    
    return df


def unwrap_mcqs(df):
    target_col = 'S3Q8'
    
    dummies = df[target_col].str.get_dummies(', ').astype(int)

    cust_col_names = {
        'An overall explanation of how the AI system works - once for the application': 'S3Q8O1',
        'An explanation for every decision made by the AI system - once per decision': 'S3Q8O2',
        'Explanation per decision if requested': 'S3Q1O3'
    }

    dummies.columns = dummies.columns.str.strip()
    dummies = dummies.rename(columns=cust_col_names)
    # display(dummies.head())

    df = pd.concat([df.drop(target_col, axis='columns'), dummies], axis='columns')


    target_col = 'S3Q11'
    
    dummies = df[target_col].str.get_dummies(', ').astype(int)

    cust_col_names = {
        'Easy to understand - Expect no technical expertise from me': 'S3Q11O1',
        'Faithful - Reflect the actual reasoning of the AI system': 'S3Q11O2',
        'Descriptive - State precisely what cause the AI system decision without leaving room for ambiguity': 'S3Q11O3',
        'Consistent - Provide similar explanations for similar/neighboring examples': 'S3Q11O4',
        'Discriminative - State Specific reasons influence the different decisions of AI system': 'S3Q11O5',
        'Interactive - Allow me to understand reasoning of the AI system in different granularities': 'S3Q11O6',
        "I'd like to see other options that I have in some scenarios (maybe percentage wise)": 'S3Q11O7',
        'I would like to have easy to understand explanations but with some technical aspects as well': 'S3Q11O8',
        'It depends on the scenario/context different scenarios require different explanation level': 'S3Q11O9',
        'Be transparent about potential biases associated with both the results and explanations': 'S3Q11O10',
        'I expect explanations to be infrequent (or rate to exponentially decay) and only generate explanations when previously provided explanations are not sufficient to explain the current scenario': 'S3Q11O11',
        'Other': 'S3Q11O12'
    }

    dummies.columns = dummies.columns.str.strip()
    dummies = dummies.rename(columns=cust_col_names)
    # display(dummies.head())

    df = pd.concat([df.drop(target_col, axis='columns'), dummies], axis='columns')

    return df

In [193]:
df = pd.read_csv("data.csv")

In [194]:
display(df.describe())
display(df.head())

print(*df.columns, sep='\n', end='\n\n')
cols_with_missing_vals(df)

Unnamed: 0,Age,S3Q7P1,S3Q7P2,S3Q7P3,S3Q7P4,S3Q7P5,S3Q10P1,S3Q10P2,S3Q10P3,S3Q10P4,S3Q10P5
count,327.0,327.0,327.0,327.0,327.0,327.0,327.0,327.0,327.0,327.0,327.0
mean,31.495413,2.262997,2.045872,2.657492,3.2263,3.24159,2.168196,2.070336,2.574924,3.103976,3.35474
std,7.338808,1.217689,1.457277,1.210434,1.362574,1.422771,1.220667,1.302878,1.115857,1.273208,1.464142
min,20.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
25%,28.0,1.0,1.0,2.0,2.0,2.0,1.0,1.0,2.0,2.0,2.0
50%,32.0,2.0,1.0,3.0,3.0,3.0,2.0,2.0,3.0,3.0,4.0
75%,33.0,3.0,3.0,3.0,4.0,5.0,3.0,3.0,3.0,4.0,5.0
max,68.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0


Unnamed: 0,Timestamp,Gender,Age,Education level,Field of occupation,Occupation level,Country of residence,S2Q1,S2Q2,S2Q3,S2Q4,S3Q1,S3Q2,S3Q3,S3Q4,S3Q5,S3Q6,S3Q7P1,S3Q7P2,S3Q7P3,S3Q7P4,S3Q7P5,S3Q8,S3Q9,S3Q10P1,S3Q10P2,S3Q10P3,S3Q10P4,S3Q10P5,S3Q11,S3Q12
0,3/21/2022 19:53:12,Female,31,Completed Undergraduate Degree,Architecture and Engineering,Entry level,Singapore,Email,Yes,No,Yes,Very important,I need an explanation for both decisions,(b),(b),(a),(a),1,1,1,3,1,An explanation for every decision made by the ...,An explanation explaining why AI made a certai...,2,1,3,4,5,Easy to understand - Expect no technical exper...,What happens when a person doesn't agree with ...
1,3/21/2022 20:12:56,Female,24,Completed Undergraduate Degree,Unoccupied,Unoccupied,Singapore,Email,Yes,No,Yes,Important,I need an explanation when I do not get the de...,(a),(a),(a),(a),1,3,4,5,2,An overall explanation of how the AI system wo...,An explanation explaining why AI made a certai...,2,1,3,4,5,Easy to understand - Expect no technical exper...,
2,3/21/2022 20:18:00,Female,31,Completed Undergraduate Degree,Computer and Mathematical,Student,Singapore,Email,Yes,Yes,Yes,Neutral,I need an explanation for both decisions,(a),(b),(a),(a),4,5,2,3,1,An overall explanation of how the AI system wo...,An explanation explaining why AI made a certai...,2,3,1,5,4,Easy to understand - Expect no technical exper...,
3,3/21/2022 20:18:37,Female,26,Completed Undergraduate Degree,Computer and Mathematical,Unoccupied,Singapore,Email,Yes,Yes,Yes,Important,I need an explanation when I do not get the de...,(b),(a),(a),(a),5,4,3,2,1,An overall explanation of how the AI system wo...,An explanation explaining why AI made a certai...,3,1,2,4,5,Easy to understand - Expect no technical exper...,
4,3/21/2022 20:33:45,Male,32,Completed Undergraduate Degree,Other,Executive level,Singapore,Email,Yes,No,Yes,Very important,I need an explanation for both decisions,(b),(b),(a),(a),1,2,4,5,3,An overall explanation of how the AI system wo...,An explanation explaining why AI made a certai...,4,2,1,3,5,Easy to understand - Expect no technical exper...,


Timestamp
Gender
Age
Education level
Field of occupation
Occupation level
Country of residence
S2Q1
S2Q2
S2Q3
S2Q4
S3Q1
S3Q2
S3Q3
S3Q4
S3Q5
S3Q6
S3Q7P1
S3Q7P2
S3Q7P3
S3Q7P4
S3Q7P5
S3Q8
S3Q9
S3Q10P1
S3Q10P2
S3Q10P3
S3Q10P4
S3Q10P5
S3Q11
S3Q12

Occupation level          6
Country of residence      1
S3Q11                     2
S3Q12                   289
dtype: int64


In [195]:
# col_domain(df, 'S3Q11')

handle_missing_vals(df)
df = handle_inconsistencies(df)
df = unwrap_mcqs(df)

display(df.head())

Unnamed: 0,Timestamp,Gender,Age,Education level,Field of occupation,Occupation level,Country of residence,S2Q1,S2Q2,S2Q3,S2Q4,S3Q1,S3Q2,S3Q3,S3Q4,S3Q5,S3Q6,S3Q7P1,S3Q7P2,S3Q7P3,S3Q7P4,S3Q7P5,S3Q9,S3Q10P1,S3Q10P2,S3Q10P3,S3Q10P4,S3Q10P5,S3Q12,S3Q8O2,S3Q8O1,S3Q1O3,S3Q11O10,S3Q11O4,S3Q11O3,S3Q11O5,S3Q11O1,S3Q11O2,S3Q11O11,S3Q11O8,S3Q11O7,S3Q11O6,S3Q11O9,S3Q11O12
0,3/21/2022 19:53:12,Female,31,Completed Undergraduate Degree,Architecture and Engineering,Entry level,Singapore,Email,Yes,No,Yes,Very important,I need an explanation for both decisions,(b),(b),(a),(a),1,1,1,3,1,An explanation explaining why AI made a certai...,2,1,3,4,5,What happens when a person doesn't agree with ...,1,0,0,0,0,1,1,1,1,0,0,0,0,0,0
1,3/21/2022 20:12:56,Female,24,Completed Undergraduate Degree,Unoccupied,Unoccupied,Singapore,Email,Yes,No,Yes,Important,I need an explanation when I do not get the de...,(a),(a),(a),(a),1,3,4,5,2,An explanation explaining why AI made a certai...,2,1,3,4,5,No,1,1,0,0,0,0,0,1,1,0,0,0,0,0,0
2,3/21/2022 20:18:00,Female,31,Completed Undergraduate Degree,Computer and Mathematical,Student,Singapore,Email,Yes,Yes,Yes,Neutral,I need an explanation for both decisions,(a),(b),(a),(a),4,5,2,3,1,An explanation explaining why AI made a certai...,2,3,1,5,4,No,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0
3,3/21/2022 20:18:37,Female,26,Completed Undergraduate Degree,Computer and Mathematical,Unoccupied,Singapore,Email,Yes,Yes,Yes,Important,I need an explanation when I do not get the de...,(b),(a),(a),(a),5,4,3,2,1,An explanation explaining why AI made a certai...,3,1,2,4,5,No,0,1,0,0,0,0,0,1,1,0,0,0,0,0,0
4,3/21/2022 20:33:45,Male,32,Completed Undergraduate Degree,Other,Executive level,Singapore,Email,Yes,No,Yes,Very important,I need an explanation for both decisions,(b),(b),(a),(a),1,2,4,5,3,An explanation explaining why AI made a certai...,4,2,1,3,5,No,0,1,0,0,1,0,0,1,0,0,0,0,1,0,0
