In [1]:
import pandas as pd
import scipy.stats as stats
from scipy.stats import chi2_contingency

In [2]:
# import Excel file with data
xlx = '../data/Coding_JSIS_MISQ_V5.xlsx'

# All journals

In [3]:
# transform Excel file to dataframe

df = pd.read_excel(xlx, header=0)
all_len = len(df)

print('Data amount: ', all_len)

df.head(2)

Data amount:  35


Unnamed: 0,Journal,Titel,Jahr,Volume,Issue,Autor/en,Use of CSR stated,Use of CSR stated DETAIL,CSR used in keywords,CSR used in keywords DETAIL,...,Comparison with similar literature (only exploratory) DETAIL,Key case study characteristics summarized,Key case study characteristics summarized DETAIL,Methodological literature cited,Methodological literature cited DETAIL,Other case studies cited,Other case studies cited DETAIL,Online-Appendix,Online-Appendix DETAIL,"Classification in terms of methodological rigor (descending: ++, +, o, -)"
0,JSIS,A strategic activity model of Enterprise Syste...,2014,23,1,"Jenny Leonard, Helen Higson",yes,3.1. Case study design + A case study approach...,no,-,...,This contributes to understanding some aspects...,no,-,yes,"Yin, R., 2003. Case Study Research: Design and...",no,-,no,-,-
1,JSIS,Forced coopetition in IT multi-sourcing,2014,23,3,"Martin Wiener, Carol Saunders",yes,To develop a deeper understanding of this mode...,no,-,...,Turning to the critical factors that enabled G...,no,-,yes,"Yin, R., 1994. Case Study Research: Design and...",no,-,no,-,+


In [4]:
# dataframes for research purposes

# 1. descriptive
df_descr = df.loc[df['Research purpose'] == 'descriptive']
descr_len = len(df_descr)
print('descriptive purpose: ', descr_len, ' => ', descr_len/all_len)
#df_descr.head(2)

# 2. explanatory
df_explan = df.loc[df['Research purpose'] == 'explanatory']
explan_len = len(df_explan)
print('explanatory purpose: ', explan_len, ' => ', explan_len/all_len)
#df_explan.head(2)

# 3. exploratory
df_explor = df.loc[df['Research purpose'] == 'exploratory']
explor_len = len(df_explor)
print('exploratory purpose: ', explor_len, ' => ', explor_len/all_len)
#df_explor.head(2)

descriptive purpose:  2  =>  0.05714285714285714
explanatory purpose:  1  =>  0.02857142857142857
exploratory purpose:  32  =>  0.9142857142857143


In [5]:
# dataframes for single and multiple CS

# 1. single CS
df_single = df.loc[df['Number of cases'] == 1]
single_len = len(df_single)
print('Number single cases: ', single_len, ' => ', single_len/all_len)
#df_single.head()

#2. multiple CS
df_multiple = df.loc[df['Number of cases'] > 1]
multiple_len = len(df_multiple)
print('Number multiple cases: ', multiple_len, ' => ', multiple_len/all_len)
#df_multiple.head()

Number single cases:  23  =>  0.6571428571428571
Number multiple cases:  12  =>  0.34285714285714286


In [6]:
# dataframe for CS including interviews

df_interv = df.loc[df['Interviews '] == 'yes']
interv_len = len(df_interv)
print('Number interview cases: ', interv_len)
#df_interv.head()

Number interview cases:  32


In [7]:
# dataframe for CS including questionnaires

df_quest = df.loc[df['Questionnaires'] == 'yes']
quest_len = len(df_quest)
print('Number Questionnaires cases: ', quest_len)
#df_quest.head()

Number Questionnaires cases:  0


In [8]:
# dataframe for CS including coding

df_code = df.loc[df['Coding of raw data'] == 'yes']
code_len = len(df_code)
print('Number coding cases: ', code_len)
#df_code.head()

Number coding cases:  31


In [9]:
# Use of CSR stated

csr_stated_all = 0

for i in range(0, all_len):
    if df['Use of CSR stated'].iloc[i] == 'yes':
        csr_stated_all += 1

print('Use of CSR stated: ', csr_stated_all, ' => ', csr_stated_all/all_len)

Use of CSR stated:  33  =>  0.9428571428571428


In [10]:
# CSR used in keywords

keywords_all = 0

for i in range(0, all_len):
    if df['CSR used in keywords'].iloc[i] == 'yes':
        keywords_all += 1

print('CSR used in keywords: ', keywords_all, ' => ', keywords_all/all_len)

CSR used in keywords:  9  =>  0.2571428571428571


In [11]:
# “Case studies” stated instead of “multiple case study”

mCS_stated_all = 0

for i in range(0, multiple_len):
    if df_multiple['“Case studies” stated instead of “multiple case study”'].iloc[i] == 'yes':
        mCS_stated_all += 1

print('“Case studies” stated instead of “multiple case study”: ', mCS_stated_all, ' => ', mCS_stated_all/multiple_len)

“Case studies” stated instead of “multiple case study”:  6  =>  0.5


_**Research Design**_

In [12]:
# research purpose

descr_all = 0
explan_all = 0
explor_all = 0

for i in range(0, all_len):
    if df['Research purpose'].iloc[i] == 'descriptive':
        descr_all += 1
    elif df['Research purpose'].iloc[i] == 'explanatory':
        explan_all += 1
    elif df['Research purpose'].iloc[i] == 'exploratory':
        explor_all += 1

print('Descriptive all: ', descr_all, ' => ', descr_all/all_len)
print('Explanatory all: ', explan_all, ' => ', explan_all/all_len)
print('Exploratory all: ', explor_all, ' => ', explor_all/all_len)

Descriptive all:  2  =>  0.05714285714285714
Explanatory all:  1  =>  0.02857142857142857
Exploratory all:  32  =>  0.9142857142857143


In [13]:
# Rationale for conducting CSR

rationale_csr_all = 0

for i in range(0, all_len):
    if df['Rationale for conducting CSR'].iloc[i] == 'yes':
        rationale_csr_all += 1

print('Rationale for conducting CSR: ', rationale_csr_all, ' => ', rationale_csr_all/all_len)

Rationale for conducting CSR:  26  =>  0.7428571428571429


In [14]:
# Clear research questions 

question_all = 0
w_interr_all = 0
wo_interr_all = 0

for i in range(0, all_len):
    if df['Clear research questions '].iloc[i] == 'yes':
        question_all += 1
    elif df['Clear research questions '].iloc[i] == 'goal with interrogative':
        w_interr_all += 1
    elif df['Clear research questions '].iloc[i] == 'goal without interrogative':
        wo_interr_all += 1

print('Research question all: ', question_all, ' => ', question_all/all_len)
print('goal with interrogative all: ', w_interr_all, ' => ', w_interr_all/all_len)
print('goal without interrogative all: ', wo_interr_all, ' => ', wo_interr_all/all_len)
print('')
print('Research question or goal with interrogative all: ', question_all+w_interr_all, ' => ',  (question_all+w_interr_all)/all_len)


# chi2
print('')

# Dubé & Paré (2003)
dp = [76, 107]

# my result
my = [question_all+w_interr_all, all_len-(question_all+w_interr_all)]

# defining the table
data = [dp, my]
stat, p, dof, expected = chi2_contingency(data)
  
# interpret p-value
alpha = 0.05
print("stat/chi2 is " + str(stat))
print("p value is " + str(p))
print("dof is " + str(dof))
print("expected is " + str(expected))
if p <= alpha:
    print('Dependent (reject H0)')
else:
    print('Independent (H0 holds true)')

Research question all:  26  =>  0.7428571428571429
goal with interrogative all:  5  =>  0.14285714285714285
goal without interrogative all:  4  =>  0.11428571428571428

Research question or goal with interrogative all:  31  =>  0.8857142857142857

stat/chi2 is 24.16706037824469
p value is 8.833019633003729e-07
dof is 1
expected is [[89.82110092 93.17889908]
 [17.17889908 17.82110092]]
Dependent (reject H0)


In [15]:
# Types of research questions 

types_rq_un = df['Types of research questions '].unique()
print('RQ types: ', types_rq_un)

amount_rq = 0
for i in range(0, all_len):
    if df['Types of research questions '].iloc[i] != '-':
        amount_rq += 1

print('Number research questions or goal with interrogative: ', amount_rq)


# possible combination of types of research question

how = 0
why = 0
to_what = 0
what = 0
does = 0

for i in range(0, all_len):
    if df['Types of research questions '].iloc[i] == 'how, how':
        how += 2
    elif df['Types of research questions '].iloc[i] == 'how':
        how += 1
    elif df['Types of research questions '].iloc[i] == 'what, how':
        how += 1
        what += 1
    elif df['Types of research questions '].iloc[i] == 'how, why':
        how += 1
        why += 1
    elif df['Types of research questions '].iloc[i] == 'how, what':
        how += 1
        what += 1
    elif df['Types of research questions '].iloc[i] == 'what, to what, what':
        what += 2
        to_what += 1
    elif df['Types of research questions '].iloc[i] == 'how, to what':
        how += 1
        to_what += 1
    elif df['Types of research questions '].iloc[i] == 'what':
        what += 1
    elif df['Types of research questions '].iloc[i] == 'does, how':
        does += 1
        how += 1

print('how absolute: ', how)
print('why absolute: ', why)
print('to what absolute: ', to_what)
print('what absolute: ', what)
print('does absolute: ', does)

RQ types:  ['how, how' '-' 'how' 'what, how' 'how, why' 'how, what'
 'what, to what, what' 'how, to what' 'what' 'does, how']
Number research questions or goal with interrogative:  31
how absolute:  33
why absolute:  3
to what absolute:  2
what absolute:  7
does absolute:  1


In [16]:
# A priori specification of constructs (only exploratory)

a_priori_all = 0

for i in range(0, explor_all):
    if df_explor['A priori specification of constructs (only exploratory)'].iloc[i] == 'yes':
        a_priori_all += 1

print('A priori specification of constructs (only exploratory): ', a_priori_all, ' => ', a_priori_all/explor_all)


# chi2
print('')

# Dubé & Paré (2003)
dp = [42, 12]

# my result
my = [a_priori_all, explor_all-a_priori_all]

# defining the table
data = [dp, my]
stat, p, dof, expected = chi2_contingency(data)
  
# interpret p-value
alpha = 0.05
print("stat/chi2 is " + str(stat))
print("p value is " + str(p))
print("dof is " + str(dof))
print("expected is " + str(expected))
if p <= alpha:
    print('Dependent (reject H0)')
else:
    print('Independent (H0 holds true)')

A priori specification of constructs (only exploratory):  28  =>  0.875

stat/chi2 is 0.6943152695105822
p value is 0.4047004645083745
dof is 1
expected is [[43.95348837 10.04651163]
 [26.04651163  5.95348837]]
Independent (H0 holds true)


In [17]:
# Clean theoretical slate (only exploratory)

cl_theor_all = 0

for i in range(0, explor_all):
    if df_explor['Clean theoretical slate (only exploratory)'].iloc[i] == 'yes':
        cl_theor_all += 1

print('Clean theoretical slate (only exploratory): ', cl_theor_all, ' => ', cl_theor_all/explor_all)


# chi2
print('')

# Dubé & Paré (2003)
dp = [44, 10]

# my result
my = [cl_theor_all, explor_all-cl_theor_all]

# defining the table
data = [dp, my]
stat, p, dof, expected = chi2_contingency(data)
  
# interpret p-value
alpha = 0.05
print("stat/chi2 is " + str(stat))
print("p value is " + str(p))
print("dof is " + str(dof))
print("expected is " + str(expected))
if p <= alpha:
    print('Dependent (reject H0)')
else:
    print('Independent (H0 holds true)')

Clean theoretical slate (only exploratory):  15  =>  0.46875

stat/chi2 is 9.623319470600546
p value is 0.0019212216743659648
dof is 1
expected is [[37.04651163 16.95348837]
 [21.95348837 10.04651163]]
Dependent (reject H0)


In [18]:
# Theory of interest stated (only explanatory)

theory_int_all = 0

for i in range(0, explan_all):
    if df_explan['Theory of interest stated (only explanatory)'].iloc[i] == 'yes':
        theory_int_all += 1

print('Theory of interest stated (only explanatory): ', theory_int_all, ' => ', theory_int_all/explan_all)


# Fisher's exact test
print('')

# Dubé & Paré (2003)
dp = [17, 0]

# my result
my = [theory_int_all, explan_all-theory_int_all]

# defining the table
data = [dp, my]

print("fisher is " + str(stats.fisher_exact(data)))

Theory of interest stated (only explanatory):  1  =>  1.0

fisher is (nan, 1.0)


In [19]:
# Predictions from theory stated  (only explanatory)

predict_all = 0

for i in range(0, explan_all):
    if df_explan['Predictions from theory stated  (only explanatory)'].iloc[i] == 'yes':
        predict_all += 1

print('Predictions from theory stated  (only explanatory): ', predict_all, ' => ', predict_all/explan_all)


# Fisher's exact test
print('')

# Dubé & Paré (2003)
dp = [16, 1]

# my result
my = [predict_all, explan_all-predict_all]

# defining the table
data = [dp, my]

print("fisher is " + str(stats.fisher_exact(data)))

Predictions from theory stated  (only explanatory):  1  =>  1.0

fisher is (0.0, 1.0)


In [20]:
# Use of rival theories (only explanatory)

rival_all = 0

for i in range(0, explan_all):
    if df_explan['Use of rival theories (only explanatory)'].iloc[i] == 'yes':
        rival_all += 1

print('Use of rival theories (only explanatory): ', rival_all, ' => ', rival_all/explan_all)


# Fisher's exact test
print('')

# Dubé & Paré (2003)
dp = [5, 12]

# my result
my = [rival_all, explan_all-rival_all]

# defining the table
data = [dp, my]

print("fisher is " + str(stats.fisher_exact(data)))

Use of rival theories (only explanatory):  0  =>  0.0

fisher is (inf, 1.0)


In [21]:
# Case acquisition strategy

acquisition_all = 0

for i in range(0, all_len):
    if df['Case acquisition strategy'].iloc[i] == 'yes':
        acquisition_all += 1

print('Case acquisition strategy: ', acquisition_all, ' => ', acquisition_all/all_len)

Case acquisition strategy:  4  =>  0.11428571428571428


In [22]:
# Number of cases

number_cases = df['Number of cases'].unique()
print('Number of cases unique: ', number_cases)

mean = df_multiple['Number of cases'].mean()
print('multiple cases mean: ', mean)

median = df_multiple['Number of cases'].median()
print('multiple cases median: ', median)


# number cases sum
two = 0
three = 0
four = 0
five = 0
twenty = 0
twentytwo = 0

for i in range(0, multiple_len):
    if df_multiple['Number of cases'].iloc[i] == 2:
        two += 1
    elif df_multiple['Number of cases'].iloc[i] == 3:
        three += 1
    elif df_multiple['Number of cases'].iloc[i] == 4:
        four += 1
    elif df_multiple['Number of cases'].iloc[i] == 5:
        five += 1
    elif df_multiple['Number of cases'].iloc[i] == 20:
        twenty += 1
    elif df_multiple['Number of cases'].iloc[i] == 22:
        twentytwo += 2

print('2 absolute: ', two)
print('3 absolute: ', three)
print('4 absolute: ', four)
print('5 absolute: ', five)
print('20 absolute: ', twenty)
print('22 absolute: ', twentytwo)


# chi2
print('')

# Dubé & Paré (2003)
dp = [74, 109]

# my result
my = [multiple_len, all_len-multiple_len]

# defining the table
data = [dp, my]
stat, p, dof, expected = chi2_contingency(data)
  
# interpret p-value
alpha = 0.05
print("stat/chi2 is " + str(stat))
print("p value is " + str(p))
print("dof is " + str(dof))
print("expected is " + str(expected))
if p <= alpha:
    print('Dependent (reject H0)')
else:
    print('Independent (H0 holds true)')


Number of cases unique:  [ 2  1 22  5 20  3  4]
multiple cases mean:  5.75
multiple cases median:  2.5
2 absolute:  6
3 absolute:  2
4 absolute:  1
5 absolute:  1
20 absolute:  1
22 absolute:  2

stat/chi2 is 0.24353124953582483
p value is 0.6216672131128154
dof is 1
expected is [[ 72.19266055 110.80733945]
 [ 13.80733945  21.19266055]]
Independent (H0 holds true)


In [23]:
# Rationale for conducting a single/multiple case study

rationale_single_multiple_all = 0

for i in range(0, all_len):
    if df['Rationale for conducting a single/multiple case study'].iloc[i] == 'yes':
        rationale_single_multiple_all += 1

print('Rationale for conducting a single/multiple case study: ', rationale_single_multiple_all, ' => ', rationale_single_multiple_all/all_len)

Rationale for conducting a single/multiple case study:  21  =>  0.6


In [24]:
# Nature of single-case design

convenience_all = 0
criterion_all = 0
critical_all = 0
extreme_all = 0
intensity_all = 0
revelatory_all = 0
theoretical_all = 0
typical_all = 0
unique_all = 0

for i in range(0, single_len):
    if df_single['Nature of single-case design'].iloc[i] == 'criterion, convenience':
        criterion_all += 1
        convenience_all += 1
    if df_single['Nature of single-case design'].iloc[i] == 'critical':
        critical_all += 1
    elif df_single['Nature of single-case design'].iloc[i] == 'extreme':
        extreme_all += 1
    elif df_single['Nature of single-case design'].iloc[i] == 'intensity':
        intensity_all += 1
    elif df_single['Nature of single-case design'].iloc[i] == 'revelatory':
        revelatory_all += 1
    elif df_single['Nature of single-case design'].iloc[i] == 'theoretical':
        theoretical_all += 1
    elif df_single['Nature of single-case design'].iloc[i] == 'typical':
        typical_all += 1
    elif df_single['Nature of single-case design'].iloc[i] == 'unique':
        unique_all += 1    

nature_sum = criterion_all + extreme_all + intensity_all + revelatory_all + theoretical_all + typical_all + unique_all
        
print('For unknown reasons the case "criterion, convenience" is not recognized by the program. Therefore adding the values by hand.')
criterion_all += 1
convenience_all += 1

print('')
print('convenience: ', convenience_all, ' => ', convenience_all/single_len)
print('criterion: ', criterion_all, ' => ', criterion_all/single_len)
print('critical: ', critical_all, ' => ', critical_all/single_len)
print('extreme and unique: ', extreme_all+unique_all, ' => ', (extreme_all+unique_all)/single_len)
print('intensity: ', intensity_all, ' => ', intensity_all/single_len)
print('revelatory: ', revelatory_all, ' => ', revelatory_all/single_len)
print('theoretical: ', theoretical_all, ' => ', theoretical_all/single_len)
print('typical: ', typical_all, ' => ', typical_all/single_len)
#print('unique: ', unique_all, ' => ', unique_all/single_len)
print('nature single described: ', nature_sum, ' => ', nature_sum/single_len)
print('nature single not specified: ', single_len-nature_sum, ' => ', 1-(nature_sum/single_len))

For unknown reasons the case "criterion, convenience" is not recognized by the program. Therefore adding the values by hand.

convenience:  1  =>  0.043478260869565216
criterion:  1  =>  0.043478260869565216
critical:  0  =>  0.0
extreme and unique:  6  =>  0.2608695652173913
intensity:  1  =>  0.043478260869565216
revelatory:  5  =>  0.21739130434782608
theoretical:  1  =>  0.043478260869565216
typical:  0  =>  0.0
nature single described:  13  =>  0.5652173913043478
nature single not specified:  10  =>  0.4347826086956522


In [25]:
# Replication logic in multiple-case design

literal_all = 0
theoretical_all = 0

for i in range(0, multiple_len):
    if df_multiple['Replication logic in multiple-case design'].iloc[i] == 'literal':
        literal_all += 1
    elif df_multiple['Replication logic in multiple-case design'].iloc[i] == 'theoretical':
        theoretical_all += 1

print('literal: ', literal_all, ' => ', literal_all/multiple_len)
print('theoretical: ', theoretical_all, ' => ', theoretical_all/multiple_len)
print('replic. logic described: ', literal_all+theoretical_all, ' => ', (literal_all+theoretical_all)/multiple_len)
print('replic. logic not specified: ', multiple_len-(literal_all+theoretical_all), ' => ', 1-((literal_all+theoretical_all)/multiple_len))

literal:  5  =>  0.4166666666666667
theoretical:  4  =>  0.3333333333333333
replic. logic described:  9  =>  0.75
replic. logic not specified:  3  =>  0.25


In [26]:
# Case/s reasonably chosen

cases_reason_all = 0

for i in range(0, all_len):
    if df['Case/s reasonably chosen'].iloc[i] == 'yes':
        cases_reason_all += 1

print('Case/s reasonably chosen: ', cases_reason_all, ' => ', cases_reason_all/all_len)
print('')


# single 
case_sel_single = 0

for i in range(0, single_len):
    if df_single['Case/s reasonably chosen'].iloc[i] == 'yes':
        case_sel_single += 1

print('single Case reasonably chosen: ', case_sel_single, ' => ', case_sel_single/single_len)


# chi2
print('')

# Dubé & Paré (2003)
dp = [16, 101]

# my result
my = [case_sel_single, single_len-case_sel_single]

# defining the table
data = [dp, my]
stat, p, dof, expected = chi2_contingency(data)
  
# interpret p-value
alpha = 0.05
print("single stat/chi2 is " + str(stat))
print("single p value is " + str(p))
print("single dof is " + str(dof))
print("single expected is " + str(expected))
if p <= alpha:
    print('Dependent (reject H0)')
else:
    print('Independent (H0 holds true)')



# multiple 
print('')
case_sel_multiple = 0

for i in range(0, multiple_len):
    if df_multiple['Case/s reasonably chosen'].iloc[i] == 'yes':
        case_sel_multiple += 1

print('multiple Cases reasonably chosen: ', case_sel_multiple, ' => ', case_sel_multiple/multiple_len)


# chi2
print('')

# Dubé & Paré (2003)
dp = [24, 51]

# my result
my = [case_sel_multiple, multiple_len-case_sel_multiple]

# defining the table
data = [dp, my]
stat, p, dof, expected = chi2_contingency(data)
  
# interpret p-value
alpha = 0.05
print("multiple stat/chi2 is " + str(stat))
print("multiple p value is " + str(p))
print("multiple dof is " + str(dof))
print("multiple expected is " + str(expected))
if p <= alpha:
    print('Dependent (reject H0)')
else:
    print('Independent (H0 holds true)')
    
    
# Fisher's exact test
print('')

print("fisher is " + str(stats.fisher_exact(data)))

Case/s reasonably chosen:  29  =>  0.8285714285714286

single Case reasonably chosen:  20  =>  0.8695652173913043

single stat/chi2 is 50.26891633078924
single p value is 1.3405622266927397e-12
single dof is 1
single expected is [[30.08571429 86.91428571]
 [ 5.91428571 17.08571429]]
Dependent (reject H0)

multiple Cases reasonably chosen:  9  =>  0.75

multiple stat/chi2 is 6.400627104377104
multiple p value is 0.011408006072346584
multiple dof is 1
multiple expected is [[28.44827586 46.55172414]
 [ 4.55172414  7.44827586]]
Dependent (reject H0)

fisher is (0.1568627450980392, 0.00813504709584242)


In [27]:
# Case/s defined

cases_def_all = 0

for i in range(0, all_len):
    if df['Case/s defined'].iloc[i] == 'yes':
        cases_def_all += 1

print('Case/s defined: ', cases_def_all, ' => ', cases_def_all/all_len)

Case/s defined:  35  =>  1.0


In [28]:
# Case/s

cases = df['Case/s'].unique()
#cases


In [29]:
# Unit of analysis stated

uoa_stated_all = 0

for i in range(0, all_len):
    if df['Unit of analysis stated'].iloc[i] == 'yes':
        uoa_stated_all += 1

print('Unit of analysis stated: ', uoa_stated_all, ' => ', uoa_stated_all/all_len)


# chi2
print('')

# Dubé & Paré (2003)
dp = [14, 169]

# my result
my = [uoa_stated_all, all_len-uoa_stated_all]

# defining the table
data = [dp, my]
stat, p, dof, expected = chi2_contingency(data)
  
# interpret p-value
alpha = 0.05
print("stat/chi2 is " + str(stat))
print("p value is " + str(p))
print("dof is " + str(dof))
print("expected is " + str(expected))
if p <= alpha:
    print('Dependent (reject H0)')
else:
    print('Independent (H0 holds true)')
    
# Fisher's exact test
print('')

print("fisher is " + str(stats.fisher_exact(data)))

Unit of analysis stated:  8  =>  0.22857142857142856

stat/chi2 is 5.905964365733519
p value is 0.015089702161222164
dof is 1
expected is [[ 18.46788991 164.53211009]
 [  3.53211009  31.46788991]]
Dependent (reject H0)

fisher is (0.27958579881656803, 0.012037015146544462)


In [30]:
# Unit of analysis

uoa = df['Unit of analysis'].unique()
#uoa


In [31]:
# Rationale for choosing the unit of analysis

rationale_uoa_all = 0

for i in range(0, all_len):
    if df['Rationale for choosing the unit of analysis'].iloc[i] == 'yes':
        rationale_uoa_all += 1

print('Rationale for choosing the unit of analysis: ', rationale_uoa_all, ' => ', rationale_uoa_all/all_len)

Rationale for choosing the unit of analysis:  15  =>  0.42857142857142855


In [32]:
# Case design

embedded_all = 0
holistic_all = 0

for i in range(0, all_len):
    if df['Case design'].iloc[i] == 'embedded':
        embedded_all += 1
    elif df['Case design'].iloc[i] == 'holistic':
        holistic_all += 1

print('embedded: ', embedded_all, ' => ', embedded_all/all_len)
print('holistic: ', holistic_all, ' => ', holistic_all/all_len)

embedded:  30  =>  0.8571428571428571
holistic:  5  =>  0.14285714285714285


In [33]:
# Rationale research site

rationale_site_all = 0

for i in range(0, all_len):
    if df['Rationale research site'].iloc[i] == 'yes' or df['Rationale research site'].iloc[i] == 'yes, see case selection':
        rationale_site_all += 1

print('Rationale research site: ', rationale_site_all, ' => ', rationale_site_all/all_len)

Rationale research site:  30  =>  0.8571428571428571


In [34]:
# Detail research site

detail_all = 0
rough_all = 0

for i in range(0, all_len):
    if df['Detail research site'].iloc[i] == 'in detail':
        detail_all += 1
    elif df['Detail research site'].iloc[i] == 'rough':
        rough_all += 1

print('in detail: ', detail_all, ' => ', detail_all/all_len)
print('rough: ', rough_all, ' => ', rough_all/all_len)

in detail:  24  =>  0.6857142857142857
rough:  8  =>  0.22857142857142856


In [35]:
# Research site

on_all = 0
off_all = 0
both_all = 0

for i in range(0, all_len):
    if df['Research site'].iloc[i] == 'on-site':
        on_all += 1
    elif df['Research site'].iloc[i] == 'off-site':
        off_all += 1
    elif df['Research site'].iloc[i] == 'on-site, off-side':
        both_all += 1

print('on-site: ', on_all, ' => ', on_all/all_len)
print('off-site: ', off_all, ' => ', off_all/all_len)
print('both: ', both_all, ' => ', both_all/all_len)

on-site:  29  =>  0.8285714285714286
off-site:  3  =>  0.08571428571428572
both:  3  =>  0.08571428571428572


In [36]:
# Use of a pilot case

pilot_all = 0

for i in range(0, all_len):
    if df['Use of a pilot case'].iloc[i] == 'yes':
        pilot_all += 1

print('Use of a pilot case: ', pilot_all, ' => ', pilot_all/all_len)


# Fisher's exact test
print('')

# Dubé & Paré (2003)
dp = [4, 179]

# my result
my = [pilot_all, all_len-pilot_all]

# defining the table
data = [dp, my]

print("fisher is " + str(stats.fisher_exact(data)))


Use of a pilot case:  0  =>  0.0

fisher is (inf, 1.0)


In [37]:
# Site description

site_descr_all = 0

for i in range(0, all_len):
    if df['Site description'].iloc[i] != 'not specified':
        site_descr_all += 1

print('Site description: ', site_descr_all, ' => ', site_descr_all/all_len)

site_descr_unique = df['Site description'].unique()
#site_descr_unique

Site description:  24  =>  0.6857142857142857


In [38]:
# Case period

period_all = 0

for i in range(0, all_len):
    if df['Case period '].iloc[i] != 'not specified':
        period_all += 1

print('Case period: ', period_all, ' => ', period_all/all_len)

site_descr_unique = df['Case period '].unique()
#site_descr_unique

Case period:  19  =>  0.5428571428571428


In [39]:
# Longitudinal design

long_all = 0

for i in range(0, all_len):
    if df['Longitudinal design'].iloc[i] == 'yes':
        long_all += 1

print('Longitudinal design: ', long_all, ' => ', long_all/all_len)

Longitudinal design:  12  =>  0.34285714285714286


In [40]:
# Time spent on site (for data collection)

time_spent_all = 0

for i in range(0, all_len):
    if df['Time spent on site (for data collection)'].iloc[i] != 'not specified':
        time_spent_all += 1

print('Time spent on site (for data collection): ', time_spent_all, ' => ', time_spent_all/all_len)

time_spent_unique = df['Time spent on site (for data collection)'].unique()
time_spent_unique

Time spent on site (for data collection):  29  =>  0.8285714285714286


array(['between 2 and 4 months', '13 months', 'not specified', '4 months',
       '6 months', '12 months',
       '8 months (5 months case A + 3 months case B)', '53 months',
       '31 months', '23 days', '12 years', 'c. 1,5-2 years', '6 years',
       '26 months', '11 months', '10 years', '5 years', '3 months',
       '2 years', '2 months', '9 months', '22 months + during fall',
       '18 months', '3 years', '11 years', '30 Monate'], dtype=object)

In [41]:
# Nature of data

on_all = 0
retro_all = 0
both_all = 0

for i in range(0, all_len):
    if df['Nature of data'].iloc[i] == 'on-going':
        on_all += 1
    elif df['Nature of data'].iloc[i] == 'retrospective':
        retro_all += 1
    elif df['Nature of data'].iloc[i] == 'both':
        both_all += 1

print('on-going: ', on_all, ' => ', on_all/all_len)
print('retrospective: ', retro_all, ' => ', retro_all/all_len)
print('both: ', both_all, ' => ', both_all/all_len)
print('nature sum: ', on_all+retro_all+both_all, ' => ', (on_all+retro_all+both_all)/all_len)

on-going:  14  =>  0.4
retrospective:  10  =>  0.2857142857142857
both:  10  =>  0.2857142857142857
nature sum:  34  =>  0.9714285714285714


In [42]:
# Team-based research (data collection and analysis)

team_roles_all = 0

for i in range(0, all_len):
    if (df['Number of researchers conducting data collection'].iloc[i] != 'not specified' and df['Number of researchers conducting data analysis'].iloc[i] != 'not specified') or (df['Number of researchers conducting data collection'].iloc[i] == 'not specified' and df['Number of researchers conducting data analysis'].iloc[i] != 'not specified') or (df['Number of researchers conducting data collection'].iloc[i] != 'not specified' and df['Number of researchers conducting data analysis'].iloc[i] == 'not specified'):
        team_roles_all += 1

print('roles in team: ', team_roles_all, ' => ', team_roles_all/all_len)


roles in team:  24  =>  0.6857142857142857


In [43]:
# Number of authors (Team-based research)

one_all = 0
two_all = 0
three_all = 0
four_all = 0
five_all = 0
six_all = 0

for i in range(0, all_len):
    if df['Number of authors  '].iloc[i] == 1:
        one_all += 1
    elif df['Number of authors  '].iloc[i] == 2:
        two_all += 1
    elif df['Number of authors  '].iloc[i] == 3:
        three_all += 1
    elif df['Number of authors  '].iloc[i] == 4:
        four_all += 1
    elif df['Number of authors  '].iloc[i] == 5:
        five_all += 1
    elif df['Number of authors  '].iloc[i] == 6:
        six_all += 1

print('1: ', one_all, ' => ', one_all/all_len)
print('2: ', two_all, ' => ', two_all/all_len)
print('3: ', three_all, ' => ', three_all/all_len)
print('4: ', four_all, ' => ', four_all/all_len)
print('5: ', five_all, ' => ', five_all/all_len)
print('6: ', six_all, ' => ', six_all/all_len)
print('Team-based research: ', all_len-one_all, ' => ', (all_len-one_all)/all_len)


# chi2
print('')

# Dubé & Paré (2003)
dp = [132, 51]

# my result
my = [all_len-one_all, one_all]

# defining the table
data = [dp, my]
stat, p, dof, expected = chi2_contingency(data)
  
# interpret p-value
alpha = 0.05
print("stat/chi2 is " + str(stat))
print("p value is " + str(p))
print("dof is " + str(dof))
print("expected is " + str(expected))
if p <= alpha:
    print('Dependent (reject H0)')
else:
    print('Independent (H0 holds true)')


1:  3  =>  0.08571428571428572
2:  7  =>  0.2
3:  16  =>  0.45714285714285713
4:  7  =>  0.2
5:  1  =>  0.02857142857142857
6:  1  =>  0.02857142857142857
Team-based research:  32  =>  0.9142857142857143

stat/chi2 is 4.881435820733435
p value is 0.027147028241688333
dof is 1
expected is [[137.66972477  45.33027523]
 [ 26.33027523   8.66972477]]
Dependent (reject H0)


In [44]:
# Involvement of the researcher

involved_all = 0

for i in range(0, all_len):
    if df['Involvement of the researcher'].iloc[i] == 'involved':
        involved_all += 1

print('Involvement of the researcher: ', involved_all, ' => ', involved_all/all_len)

Involvement of the researcher:  6  =>  0.17142857142857143


In [45]:
# Different roles for multiple investigators

diff_roles_all = 0

for i in range(0, all_len):
    if df['Different roles for multiple investigators'].iloc[i] == 'yes':
        diff_roles_all += 1

print('Different roles for multiple investigators: ', diff_roles_all, ' => ', diff_roles_all/all_len)


# chi2
print('')

# Dubé & Paré (2003)
dp = [8, 175]

# my result
my = [diff_roles_all, all_len-diff_roles_all]

# defining the table
data = [dp, my]
stat, p, dof, expected = chi2_contingency(data)
  
# interpret p-value
alpha = 0.05
print("stat/chi2 is " + str(stat))
print("p value is " + str(p))
print("dof is " + str(dof))
print("expected is " + str(expected))
if p <= alpha:
    print('Dependent (reject H0)')
else:
    print('Independent (H0 holds true)')

    
# Fisher's exact test
print('')

print("fisher is " + str(stats.fisher_exact(data)))

Different roles for multiple investigators:  6  =>  0.17142857142857143

stat/chi2 is 5.990617995832213
p value is 0.014382162822009813
dof is 1
expected is [[ 11.75229358 171.24770642]
 [  2.24770642  32.75229358]]
Dependent (reject H0)

fisher is (0.22095238095238096, 0.012870884895002401)


_**Data Collection**_

In [46]:
# Elucidation of the data collection process 

detail_all = 0
rough_all = 0

for i in range(0, all_len):
    if df['Elucidation of the data collection process '].iloc[i] == 'in detail':
        detail_all += 1
    elif df['Elucidation of the data collection process '].iloc[i] == 'rough':
        rough_all += 1

print('in detail: ', detail_all, ' => ', detail_all/all_len)
print('rough: ', rough_all, ' => ', rough_all/all_len)


# chi2
print('')

# Dubé & Paré (2003)
dp = [107, 76]

# my result
my = [detail_all+rough_all, all_len-(detail_all+rough_all)]

# defining the table
data = [dp, my]
stat, p, dof, expected = chi2_contingency(data)
  
# interpret p-value
alpha = 0.05
print("stat/chi2 is " + str(stat))
print("p value is " + str(p))
print("dof is " + str(dof))
print("expected is " + str(expected))
if p <= alpha:
    print('Dependent (reject H0)')
else:
    print('Independent (H0 holds true)')

in detail:  31  =>  0.8857142857142857
rough:  4  =>  0.11428571428571428

stat/chi2 is 20.523732241015836
p value is 5.889647715720123e-06
dof is 1
expected is [[119.20183486  63.79816514]
 [ 22.79816514  12.20183486]]
Dependent (reject H0)


In [47]:
# Interviews 

interviews_all = 0

for i in range(0, all_len):
    if df['Interviews '].iloc[i] == 'yes':
        interviews_all += 1

print('Interviews : ', interviews_all, ' => ', interviews_all/all_len)

Interviews :  32  =>  0.9142857142857143


In [48]:
# Kind of interviews

semi_all = 0
struct_all = 0

for i in range(0, interv_len):
    if df_interv['Kind of interviews'].iloc[i] == 'semi-structured':
        semi_all += 1
    elif df_interv['Kind of interviews'].iloc[i] == 'structured':
        struct_all += 1

print('semi-structured: ', semi_all, ' => ', semi_all/interv_len)
print('structured: ', struct_all, ' => ', struct_all/interv_len)

semi-structured:  19  =>  0.59375
structured:  2  =>  0.0625


In [49]:
# Sampling strategy (interviews) (up to three)

criterion_all = 0
purposeful_all = 0
snowball_all = 0
variation_all = 0

for i in range(0, interv_len):
    if 'criterion' in df_interv['Sampling strategy (interviews) (up to three)'].iloc[i]:
        criterion_all += 1
    if 'purposeful' in df_interv['Sampling strategy (interviews) (up to three)'].iloc[i]:
        purposeful_all += 1
    if 'snowball' in df_interv['Sampling strategy (interviews) (up to three)'].iloc[i]:
        snowball_all += 1
    if 'maximum variation' in df_interv['Sampling strategy (interviews) (up to three)'].iloc[i]:
        variation_all += 1

print('criterion: ', criterion_all, ' => ', criterion_all/interv_len)
print('purposeful: ', purposeful_all, ' => ', purposeful_all/interv_len)
print('snowball: ', snowball_all, ' => ', snowball_all/interv_len)
print('maximum variation: ', variation_all, ' => ', variation_all/interv_len)

criterion:  2  =>  0.0625
purposeful:  13  =>  0.40625
snowball:  8  =>  0.25
maximum variation:  20  =>  0.625


In [50]:
# Number of interviewees 

number_interviewees_all = 0

for i in range(0, interv_len):
    if df_interv['Number of interviewees '].iloc[i] != 'not specified':
        number_interviewees_all += 1

print('Number of interviewees: ', number_interviewees_all, ' => ', number_interviewees_all/interv_len)

number_interviewees_unique = df_interv['Number of interviewees '].unique()
#number_interviewees_unique

Number of interviewees:  22  =>  0.6875


In [51]:
# Number of interviews

number_interviews_all = 0

for i in range(0, interv_len):
    if df_interv['Number of interviews'].iloc[i] != 'not specified':
        number_interviews_all += 1

print('Number of interviews: ', number_interviews_all, ' => ', number_interviews_all/interv_len)

number_interviews_unique = df_interv['Number of interviews'].unique()
number_interviews_unique

Number of interviews:  24  =>  0.75


array(['not specified', 20, 18, 38, 158, 84, 17, 25, 50, 31, 83, 69, 46,
       49, 29, 57, 33, 39, 24, 23, 27], dtype=object)

In [52]:
# Use of an interview guide

compl_text_all = 0
compl_appendix_all = 0
concr_examples_all = 0
broad_overv_all = 0
only_ment_all = 0
on_request_all = 0
not_spec_all = 0

for i in range(0, interv_len):
    if df_interv['Use of an interview guide'].iloc[i] == 'yes, complete (in text)':
        compl_text_all += 1
    elif df_interv['Use of an interview guide'].iloc[i] == 'yes, complete (in appendix)':
        compl_appendix_all += 1
    elif df_interv['Use of an interview guide'].iloc[i] == 'yes, concrete examples':
        concr_examples_all += 1
    elif df_interv['Use of an interview guide'].iloc[i] == 'yes, broad overview':
        broad_overv_all += 1
    elif df_interv['Use of an interview guide'].iloc[i] == 'yes, only mentioned':
        only_ment_all += 1
    elif df_interv['Use of an interview guide'].iloc[i] == 'yes, on request':
        on_request_all += 1
    elif df_interv['Use of an interview guide'].iloc[i] == 'not specified':
        not_spec_all += 1

print('complete in text: ', compl_text_all, ' => ', compl_text_all/interv_len)
print('complete in appendix: ', compl_appendix_all, ' => ', compl_appendix_all/interv_len)
print('concrete examples: ', concr_examples_all, ' => ', concr_examples_all/interv_len)
print('broad overview: ', broad_overv_all, ' => ', broad_overv_all/interv_len)
print('only mentioned: ', only_ment_all, ' => ', only_ment_all/interv_len)
print('on request: ', on_request_all, ' => ', on_request_all/interv_len)
print('not specified: ', not_spec_all, ' => ', not_spec_all/interv_len)

complete in text:  0  =>  0.0
complete in appendix:  8  =>  0.25
concrete examples:  9  =>  0.28125
broad overview:  0  =>  0.0
only mentioned:  4  =>  0.125
on request:  0  =>  0.0
not specified:  11  =>  0.34375


In [53]:
# Pre-test of interview guide

interv_guide_all = 0

for i in range(0, interv_len):
    if df_interv['Pre-test of interview guide'].iloc[i] == 'yes':
        interv_guide_all += 1

print('Pre-test of interview guide: ', interv_guide_all, ' => ', interv_guide_all/interv_len)

Pre-test of interview guide:  1  =>  0.03125


In [54]:
# Interview transcription

interv_transcr_all = 0

for i in range(0, interv_len):
    if df_interv['Interview transcription'].iloc[i] == 'yes':
        interv_transcr_all += 1

print('Interview transcription: ', interv_transcr_all, ' => ', interv_transcr_all/interv_len)

Interview transcription:  28  =>  0.875


In [55]:
# Interview review

interv_review_all = 0

for i in range(0, interv_len):
    if df_interv['Interview review'].iloc[i] == 'yes':
        interv_review_all += 1

print('Interview review: ', interv_review_all, ' => ', interv_review_all/interv_len)

Interview review:  8  =>  0.25


In [56]:
# Follow up interviews

interv_followup_all = 0

for i in range(0, interv_len):
    if df_interv['Follow up interviews'].iloc[i] == 'yes':
        interv_followup_all += 1

print('Follow up interviews: ', interv_followup_all, ' => ', interv_followup_all/interv_len)

Follow up interviews:  6  =>  0.1875


In [57]:
# Observation 

obers_all = 0

for i in range(0, all_len):
    if df['Observation '].iloc[i] == 'yes':
        obers_all += 1

print('Observation: ', obers_all, ' => ', obers_all/all_len)

Observation:  20  =>  0.5714285714285714


In [58]:
# Documentation 

doc_all = 0

for i in range(0, all_len):
    if df['Documentation '].iloc[i] == 'yes':
        doc_all += 1

print('Documentation: ', doc_all, ' => ', doc_all/all_len)

Documentation:  28  =>  0.8


In [59]:
# Questionnaires

quest_all = 0

for i in range(0, all_len):
    if df['Questionnaires'].iloc[i] == 'yes':
        quest_all += 1

print('Questionnaires: ', quest_all, ' => ', quest_all/all_len)

Questionnaires:  0  =>  0.0


In [60]:
# Questionnaires provided

compl_text_all = 0
compl_appendix_all = 0
concr_examples_all = 0
broad_overv_all = 0
only_ment_all = 0
on_request_all = 0
not_spec_all = 0

for i in range(0, quest_len):
    if df_quest['Questionnaires provided'].iloc[i] == 'yes, complete (in text)':
        compl_text_all += 1
    elif df_quest['Questionnaires provided'].iloc[i] == 'yes, complete (in appendix)':
        compl_appendix_all += 1
    elif df_quest['Questionnaires provided'].iloc[i] == 'yes, concrete examples':
        concr_examples_all += 1
    elif df_quest['Questionnaires provided'].iloc[i] == 'yes, broad overview':
        broad_overv_all += 1
    elif df_quest['Questionnaires provided'].iloc[i] == 'yes, only mentioned':
        only_ment_all += 1
    elif df_quest['Questionnaires provided'].iloc[i] == 'yes, on request':
        on_request_all += 1
    elif df_quest['Questionnaires provided'].iloc[i] == 'not specified':
        not_spec_all += 1

print('complete in text: ', compl_text_all, ' => ', compl_text_all/interv_len)
print('complete in appendix: ', compl_appendix_all, ' => ', compl_appendix_all/interv_len)
print('concrete examples: ', concr_examples_all, ' => ', concr_examples_all/interv_len)
print('broad overview: ', broad_overv_all, ' => ', broad_overv_all/interv_len)
print('only mentioned: ', only_ment_all, ' => ', only_ment_all/interv_len)
print('on request: ', on_request_all, ' => ', on_request_all/interv_len)
print('not specified: ', not_spec_all, ' => ', not_spec_all/interv_len)

complete in text:  0  =>  0.0
complete in appendix:  0  =>  0.0
concrete examples:  0  =>  0.0
broad overview:  0  =>  0.0
only mentioned:  0  =>  0.0
on request:  0  =>  0.0
not specified:  0  =>  0.0


In [61]:
# multiple data collection methods

multiple_methods_all = 0

for i in range(0, all_len):
    if (df['Interviews '].iloc[i] == 'yes' and df['Observation '].iloc[i] == 'yes' and df['Documentation '].iloc[i] == 'yes') or (df['Interviews '].iloc[i] == 'yes' and df['Observation '].iloc[i] == 'yes') or (df['Interviews '].iloc[i] == 'yes' and df['Documentation '].iloc[i] == 'yes') or (df['Observation '].iloc[i] == 'yes' and df['Documentation '].iloc[i] == 'yes'):
        multiple_methods_all += 1

print('multiple data collection methods: ', multiple_methods_all, ' => ', multiple_methods_all/all_len)


three_qual_all = 0

for i in range(0, all_len):
    if df['Interviews '].iloc[i] == 'yes' and df['Observation '].iloc[i] == 'yes' and df['Documentation '].iloc[i] == 'yes':
        three_qual_all += 1

print('use of all three qual. methods: ', three_qual_all, ' => ', three_qual_all/all_len)


# chi2
print('')

# Dubé & Paré (2003)
dp = [83, 24]

# my result
my = [multiple_methods_all, all_len-multiple_methods_all]

# defining the table
data = [dp, my]
stat, p, dof, expected = chi2_contingency(data)
  
# interpret p-value
alpha = 0.05
print("stat/chi2 is " + str(stat))
print("p value is " + str(p))
print("dof is " + str(dof))
print("expected is " + str(expected))
if p <= alpha:
    print('Dependent (reject H0)')
else:
    print('Independent (H0 holds true)')


multiple data collection methods:  31  =>  0.8857142857142857
use of all three qual. methods:  14  =>  0.4

stat/chi2 is 1.3812821439446406
p value is 0.23988287228827543
dof is 1
expected is [[85.90140845 21.09859155]
 [28.09859155  6.90140845]]
Independent (H0 holds true)


In [62]:
# Kind of data collected (Mix of qualitative and quantitative data)

quantitative_all = 0
qualitative_all = 0
both_all = 0

for i in range(0, all_len):
    if df['Kind of data collected'].iloc[i] == 'quantitative':
        quantitative_all += 1
    elif df['Kind of data collected'].iloc[i] == 'qualitative':
        qualitative_all += 1
    elif df['Kind of data collected'].iloc[i] == 'both':
        both_all += 1

print('quantitative: ', quantitative_all, ' => ', quantitative_all/all_len)
print('qualitative: ', qualitative_all, ' => ', qualitative_all/all_len)
print('both: ', both_all, ' => ', both_all/all_len)


# chi2
print('')

# Dubé & Paré (2003)
dp = [33, 74]

# my result
my = [both_all, all_len-both_all]

# defining the table
data = [dp, my]
stat, p, dof, expected = chi2_contingency(data)
  
# interpret p-value
alpha = 0.05
print("stat/chi2 is " + str(stat))
print("p value is " + str(p))
print("dof is " + str(dof))
print("expected is " + str(expected))
if p <= alpha:
    print('Dependent (reject H0)')
else:
    print('Independent (H0 holds true)')


quantitative:  0  =>  0.0
qualitative:  33  =>  0.9428571428571428
both:  2  =>  0.05714285714285714

stat/chi2 is 7.663430189964011
p value is 0.0056351267678752485
dof is 1
expected is [[26.37323944 80.62676056]
 [ 8.62676056 26.37323944]]
Dependent (reject H0)


In [63]:
# Data triangulation 

data_tri_all = 0

for i in range(0, all_len):
    if df['Data triangulation '].iloc[i] == 'yes':
        data_tri_all += 1

print('Data triangulation: ', data_tri_all, ' => ', data_tri_all/all_len)


# chi2
print('')

# Dubé & Paré (2003)
dp = [32, 75]

# my result
my = [data_tri_all, all_len-data_tri_all]

# defining the table
data = [dp, my]
stat, p, dof, expected = chi2_contingency(data)
  
# interpret p-value
alpha = 0.05
print("stat/chi2 is " + str(stat))
print("p value is " + str(p))
print("dof is " + str(dof))
print("expected is " + str(expected))
if p <= alpha:
    print('Dependent (reject H0)')
else:
    print('Independent (H0 holds true)')

Data triangulation:  22  =>  0.6285714285714286

stat/chi2 is 10.792436941736007
p value is 0.001019156252427995
dof is 1
expected is [[40.69014085 66.30985915]
 [13.30985915 21.69014085]]
Dependent (reject H0)


In [64]:
# Researcher triangulation during data collection

researcher_tri_data_all = 0

for i in range(0, all_len):
    if df['Researcher triangulation during data collection'].iloc[i] == 'yes':
        researcher_tri_data_all += 1

print('Researcher triangulation during data collection: ', researcher_tri_data_all, ' => ', researcher_tri_data_all/all_len)

Researcher triangulation during data collection:  2  =>  0.05714285714285714


In [65]:
# Number of researchers conducting data collection

researchers_data_all = 0

for i in range(0, all_len):
    if df['Number of researchers conducting data collection'].iloc[i] != 'not specified':
        researchers_data_all += 1

print('Number of researchers conducting data collection: ', researchers_data_all, ' => ', researchers_data_all/all_len)

researchers_data_unique = df['Number of researchers conducting data collection'].unique()
#researchers_data_unique

Number of researchers conducting data collection:  14  =>  0.4


In [66]:
# Case study protocol

protocol_all = 0

for i in range(0, all_len):
    if df['Case study protocol'].iloc[i] == 'yes':
        protocol_all += 1

print('Case study protocol: ', protocol_all, ' => ', protocol_all/all_len)


# chi2
print('')

# Dubé & Paré (2003)
dp = [5, 102]

# my result
my = [protocol_all, all_len-protocol_all]

# defining the table
data = [dp, my]
stat, p, dof, expected = chi2_contingency(data)
  
# interpret p-value
alpha = 0.05
print("stat/chi2 is " + str(stat))
print("p value is " + str(p))
print("dof is " + str(dof))
print("expected is " + str(expected))
if p <= alpha:
    print('Dependent (reject H0)')
else:
    print('Independent (H0 holds true)')
    
# Fisher's exact test
print('')

print("fisher is " + str(stats.fisher_exact(data)))

Case study protocol:  4  =>  0.11428571428571428

stat/chi2 is 1.049264906815325
p value is 0.30567644616496864
dof is 1
expected is [[  6.78169014 100.21830986]
 [  2.21830986  32.78169014]]
Independent (H0 holds true)

fisher is (0.3799019607843137, 0.22405987362385774)


In [67]:
# Case study database 

database_all = 0

for i in range(0, all_len):
    if df['Case study database '].iloc[i] == 'yes':
        database_all += 1

print('Case study database: ', database_all, ' => ', database_all/all_len)


# chi2
print('')

# Dubé & Paré (2003)
dp = [6, 101]

# my result
my = [database_all, all_len-database_all]

# defining the table
data = [dp, my]
stat, p, dof, expected = chi2_contingency(data)
  
# interpret p-value
alpha = 0.05
print("stat/chi2 is " + str(stat))
print("p value is " + str(p))
print("dof is " + str(dof))
print("expected is " + str(expected))
if p <= alpha:
    print('Dependent (reject H0)')
else:
    print('Independent (H0 holds true)')
    
# Fisher's exact test
print('')

print("fisher is " + str(stats.fisher_exact(data)))

Case study database:  4  =>  0.11428571428571428

stat/chi2 is 0.6207221750212406
p value is 0.43077904778650933
dof is 1
expected is [[ 7.53521127 99.46478873]
 [ 2.46478873 32.53521127]]
Independent (H0 holds true)

fisher is (0.4603960396039604, 0.26215215137769615)


In [68]:
# Overlap of data collection and analysis

overlap_all = 0

for i in range(0, all_len):
    if df['Overlap of data collection and analysis'].iloc[i] == 'yes':
        overlap_all += 1

print('Overlap of data collection and analysis: ', overlap_all, ' => ', overlap_all/all_len)

Overlap of data collection and analysis:  11  =>  0.3142857142857143


_**Analysis**_

In [69]:
# Elucidation of the data analysis process

detail_all = 0
rough_all = 0

for i in range(0, all_len):
    if df['Elucidation of the data analysis process'].iloc[i] == 'in detail':
        detail_all += 1
    elif df['Elucidation of the data analysis process'].iloc[i] == 'rough':
        rough_all += 1

print('in detail: ', detail_all, ' => ', detail_all/all_len)
print('rough: ', rough_all, ' => ', rough_all/all_len)
print('Elucidation of the data analysis process: ', detail_all+rough_all, ' => ', (detail_all+rough_all)/all_len)


# chi2
print('')

# Dubé & Paré (2003)
dp = [43, 140]

# my result
my = [detail_all+rough_all, all_len-(detail_all+rough_all)]

# defining the table
data = [dp, my]
stat, p, dof, expected = chi2_contingency(data)
  
# interpret p-value
alpha = 0.05
print("stat/chi2 is " + str(stat))
print("p value is " + str(p))
print("dof is " + str(dof))
print("expected is " + str(expected))
if p <= alpha:
    print('Dependent (reject H0)')
else:
    print('Independent (H0 holds true)')

in detail:  29  =>  0.8285714285714286
rough:  5  =>  0.14285714285714285
Elucidation of the data analysis process:  34  =>  0.9714285714285714

stat/chi2 is 66.5660002860262
p value is 3.3837233458642906e-16
dof is 1
expected is [[ 64.63761468 118.36238532]
 [ 12.36238532  22.63761468]]
Dependent (reject H0)


In [70]:
# Field notes

notes_all = 0

for i in range(0, all_len):
    if df['Field notes'].iloc[i] == 'yes':
        notes_all += 1

print('Field notes: ', notes_all, ' => ', notes_all/all_len)


# chi2
print('')

# Dubé & Paré (2003)
dp = [9, 174]

# my result
my = [notes_all, all_len-notes_all]

# defining the table
data = [dp, my]
stat, p, dof, expected = chi2_contingency(data)
  
# interpret p-value
alpha = 0.05
print("stat/chi2 is " + str(stat))
print("p value is " + str(p))
print("dof is " + str(dof))
print("expected is " + str(expected))
if p <= alpha:
    print('Dependent (reject H0)')
else:
    print('Independent (H0 holds true)')
    
# Fisher's exact test
print('')

print("fisher is " + str(stats.fisher_exact(data)))

Field notes:  15  =>  0.42857142857142855

stat/chi2 is 39.37990542450994
p value is 3.4886869954982e-10
dof is 1
expected is [[ 20.14678899 162.85321101]
 [  3.85321101  31.14678899]]
Dependent (reject H0)

fisher is (0.06896551724137931, 3.133613116041653e-08)


In [71]:
# Coding of raw data

code_all = 0

for i in range(0, all_len):
    if df['Coding of raw data'].iloc[i] == 'yes':
        code_all += 1

print('Coding of raw data: ', code_all, ' => ', code_all/all_len)


# chi2
print('')

# Dubé & Paré (2003)
dp = [12, 171]

# my result
my = [code_all, all_len-code_all]

# defining the table
data = [dp, my]
stat, p, dof, expected = chi2_contingency(data)
  
# interpret p-value
alpha = 0.05
print("stat/chi2 is " + str(stat))
print("p value is " + str(p))
print("dof is " + str(dof))
print("expected is " + str(expected))
if p <= alpha:
    print('Dependent (reject H0)')
else:
    print('Independent (H0 holds true)')

Coding of raw data:  31  =>  0.8857142857142857

stat/chi2 is 119.68308496528613
p value is 7.421917603133727e-28
dof is 1
expected is [[ 36.09633028 146.90366972]
 [  6.90366972  28.09633028]]
Dependent (reject H0)


In [72]:
# Coding scheme available

compl_text_all = 0
compl_appendix_all = 0
partially_all = 0
on_request_all = 0
not_spec_all = 0

for i in range(0, code_len):
    if df_code['Coding scheme available'].iloc[i] == 'yes, complete (in text)':
        compl_text_all += 1
    elif df_code['Coding scheme available'].iloc[i] == 'yes, complete (in appendix)':
        compl_appendix_all += 1
    elif df_code['Coding scheme available'].iloc[i] == 'yes, partially':
        partially_all += 1
    elif df_code['Coding scheme available'].iloc[i] == 'yes, on request':
        on_request_all += 1
    elif df_code['Coding scheme available'].iloc[i] == 'no':
        not_spec_all += 1

print('complete in text: ', compl_text_all, ' => ', compl_text_all/code_len)
print('complete in appendix: ', compl_appendix_all, ' => ', compl_appendix_all/code_len)
print('partially: ', partially_all, ' => ', partially_all/code_len)
print('on request: ', on_request_all, ' => ', on_request_all/code_len)
print('no: ', not_spec_all, ' => ', not_spec_all/code_len)

complete in text:  5  =>  0.16129032258064516
complete in appendix:  6  =>  0.1935483870967742
partially:  11  =>  0.3548387096774194
on request:  0  =>  0.0
no:  9  =>  0.2903225806451613


In [73]:
# Validation of coding scheme

validation_all = 0

for i in range(0, code_len):
    if df_code['Validation of coding scheme'].iloc[i] == 'yes':
        validation_all += 1

print('Validation of coding scheme: ', validation_all, ' => ', validation_all/code_len)

Validation of coding scheme:  3  =>  0.0967741935483871


In [74]:
# Example codes available

example_all = 0

for i in range(0, code_len):
    if df_code['Example codes available'].iloc[i] == 'yes':
        example_all += 1

print('Example codes available: ', example_all, ' => ', example_all/code_len)

Example codes available:  12  =>  0.3870967741935484


In [75]:
# Researcher triangulation during data analysis

researcher_tri_analysis_all = 0

for i in range(0, all_len):
    if df['Researcher triangulation during data analysis'].iloc[i] == 'yes':
        researcher_tri_analysis_all += 1

print('Researcher triangulation during data analysis: ', researcher_tri_analysis_all, ' => ', researcher_tri_analysis_all/all_len)

Researcher triangulation during data analysis:  13  =>  0.37142857142857144


In [76]:
# Number of researchers conducting data analysis

researchers_analysis_all = 0

for i in range(0, all_len):
    if df['Number of researchers conducting data analysis'].iloc[i] != 'not specified':
        researchers_analysis_all += 1

print('Number of researchers conducting data analysis: ', researchers_analysis_all, ' => ', researchers_analysis_all/all_len)

researchers_analysis_unique = df['Number of researchers conducting data analysis'].unique()
researchers_analysis_unique

Number of researchers conducting data analysis:  17  =>  0.4857142857142857


array(['not specified', 'min. 2', 2, 1, 4, 3, '2 to 3'], dtype=object)

In [77]:
# Inter-rater reliability test 

inter_rater_all = 0


for i in range(0, all_len):
    if df['Inter-rater reliability test '].iloc[i] == 'yes':
        inter_rater_all += 1

print('Inter-rater reliability test : ', inter_rater_all, ' => ', inter_rater_all/all_len)

Inter-rater reliability test :  3  =>  0.08571428571428572


In [78]:
# Inter-rater agreement ratio

inter_ratio_unique = df['Inter-rater agreement ratio'].unique()
inter_ratio_unique

array(['not specified', 0.743, 'multiple ratios (see detail)', 1],
      dtype=object)

In [79]:
# Coding software

atlas_all = 0
nvivo_all = 0
no_all = 0

for i in range(0, code_len):
    if df_code['Coding software'].iloc[i] == 'ATLAS-ti':
        atlas_all += 1
    elif df_code['Coding software'].iloc[i] == 'Nvivo':
        nvivo_all += 1
    elif df_code['Coding software'].iloc[i] == 'not specified':
        no_all += 1

print('ATLAS-ti: ', atlas_all, ' => ', atlas_all/code_len)
print('Nvivo: ', nvivo_all, ' => ', nvivo_all/code_len)
print('not specified: ', no_all, ' => ', no_all/code_len)

ATLAS-ti:  4  =>  0.12903225806451613
Nvivo:  5  =>  0.16129032258064516
not specified:  22  =>  0.7096774193548387


In [80]:
# Data displays (technique)

expl_all = 0
impl_all = 0
no_all = 0

for i in range(0, all_len):
    if df['Data displays (technique)'].iloc[i] == 'yes, explicitly':
        expl_all += 1
    elif df['Data displays (technique)'].iloc[i] == 'yes, implicitly':
        impl_all += 1
    elif df['Data displays (technique)'].iloc[i] == 'not specified':
        no_all += 1

print('explicitly: ', expl_all, ' => ', expl_all/all_len)
print('implicitly: ', impl_all, ' => ', impl_all/all_len)
print('no: ', no_all, ' => ', no_all/all_len)

explicitly:  2  =>  0.05714285714285714
implicitly:  10  =>  0.2857142857142857
no:  23  =>  0.6571428571428571


In [81]:
# Data displays

displays_all = 0

for i in range(0, all_len):
    if df['Data displays'].iloc[i] == 'yes':
        displays_all += 1

print('Data displays: ', displays_all, ' => ', displays_all/all_len)


# chi2
print('')

# Dubé & Paré (2003)
dp = [100, 83]

# my result
my = [displays_all, all_len-displays_all]

# defining the table
data = [dp, my]
stat, p, dof, expected = chi2_contingency(data)
  
# interpret p-value
alpha = 0.05
print("stat/chi2 is " + str(stat))
print("p value is " + str(p))
print("dof is " + str(dof))
print("expected is " + str(expected))
if p <= alpha:
    print('Dependent (reject H0)')
else:
    print('Independent (H0 holds true)')

Data displays:  35  =>  1.0

stat/chi2 is 23.746512294298196
p value is 1.0989433550560636e-06
dof is 1
expected is [[113.32568807  69.67431193]
 [ 21.67431193  13.32568807]]
Dependent (reject H0)


In [82]:
# Flexible and opportunistic process

flexible_all = 0

for i in range(0, all_len):
    if df['Flexible and opportunistic process'].iloc[i] == 'yes':
        flexible_all += 1

print('Flexible and opportunistic process: ', flexible_all, ' => ', flexible_all/all_len)


# chi2
print('')

# Dubé & Paré (2003)
dp = [5, 178]

# my result
my = [flexible_all, all_len-flexible_all]

# defining the table
data = [dp, my]
stat, p, dof, expected = chi2_contingency(data)
  
# interpret p-value
alpha = 0.05
print("stat/chi2 is " + str(stat))
print("p value is " + str(p))
print("dof is " + str(dof))
print("expected is " + str(expected))
if p <= alpha:
    print('Dependent (reject H0)')
else:
    print('Independent (H0 holds true)')
    
# Fisher's exact test
print('')

print("fisher is " + str(stats.fisher_exact(data)))

Flexible and opportunistic process:  2  =>  0.05714285714285714

stat/chi2 is 0.1549474983840168
p value is 0.6938514887927061
dof is 1
expected is [[  5.87614679 177.12385321]
 [  1.12385321  33.87614679]]
Independent (H0 holds true)

fisher is (0.46348314606741575, 0.31273952854271525)


In [83]:
# Logical chain of evidence

evidence_all = 0

for i in range(0, all_len):
    if df['Logical chain of evidence'].iloc[i] == 'yes':
        evidence_all += 1

print('Logical chain of evidence: ', evidence_all, ' => ', evidence_all/all_len)


# chi2
print('')

# Dubé & Paré (2003)
dp = [35, 148]

# my result
my = [evidence_all, all_len-evidence_all]

# defining the table
data = [dp, my]
stat, p, dof, expected = chi2_contingency(data)
  
# interpret p-value
alpha = 0.05
print("stat/chi2 is " + str(stat))
print("p value is " + str(p))
print("dof is " + str(dof))
print("expected is " + str(expected))
if p <= alpha:
    print('Dependent (reject H0)')
else:
    print('Independent (H0 holds true)')

Logical chain of evidence:  34  =>  0.9714285714285714

stat/chi2 is 79.09779826986579
p value is 5.911016837735425e-19
dof is 1
expected is [[ 57.92201835 125.07798165]
 [ 11.07798165  23.92201835]]
Dependent (reject H0)


In [84]:
# Empirical testing (only explanatory)

empir_test_all = 0

for i in range(0, explan_len):
    if df_explan['Empirical testing (only explanatory)'].iloc[i] == 'yes':
        empir_test_all += 1

print('Empirical testing (only explanatory): ', empir_test_all, ' => ', empir_test_all/explan_len)


# Fisher's exact test
print('')

# Dubé & Paré (2003)
dp = [11, 6]

# my result
my = [empir_test_all, explan_len-empir_test_all]

# defining the table
data = [dp, my]

print("fisher is " + str(stats.fisher_exact(data)))

Empirical testing (only explanatory):  1  =>  1.0

fisher is (0.0, 1.0)


In [85]:
# Explanation building (only exploratory)

explan_build_all = 0

for i in range(0, explor_len):
    if df_explor['Explanation building (only exploratory)'].iloc[i] == 'yes':
        explan_build_all += 1

print('Explanation building (only exploratory): ', explan_build_all, ' => ', explan_build_all/explor_len)


# chi2
print('')

# Dubé & Paré (2003)
dp = [32, 22]

# my result
my = [explan_build_all, explor_len-explan_build_all]

# defining the table
data = [dp, my]
stat, p, dof, expected = chi2_contingency(data)
  
# interpret p-value
alpha = 0.05
print("stat/chi2 is " + str(stat))
print("p value is " + str(p))
print("dof is " + str(dof))
print("expected is " + str(expected))
if p <= alpha:
    print('Dependent (reject H0)')
else:
    print('Independent (H0 holds true)')

Explanation building (only exploratory):  31  =>  0.96875

stat/chi2 is 12.655045465582905
p value is 0.00037455348643691236
dof is 1
expected is [[39.55813953 14.44186047]
 [23.44186047  8.55813953]]
Dependent (reject H0)


In [86]:
# Time series analysis (only explanatory)

time_series_all = 0

for i in range(0, explan_len):
    if df_explan['Time series analysis (only explanatory)'].iloc[i] == 'yes':
        time_series_all += 1

print('Time series analysis (only explanatory): ', time_series_all, ' => ', time_series_all/explan_len)


# Fisher's exact test
print('')

# Dubé & Paré (2003)
dp = [2, 15]

# my result
my = [time_series_all, explan_len-time_series_all]

# defining the table
data = [dp, my]

print("fisher is " + str(stats.fisher_exact(data)))

Time series analysis (only explanatory):  1  =>  1.0

fisher is (0.0, 0.16666666666666652)


In [87]:
# Use of natural controls (only explanatory single CS)

# dataframe for explanatory single CS

df_explan_single = df_explan.loc[df_explan['Number of cases'] == 1]
explan_single_len = len(df_explan_single)
print('Number explanatory single cases: ', explan_single_len)
#df_single.head()


natural_controls_all = 0

for i in range(0, explan_single_len):
    if df_explan_single['Use of natural controls (only explanatory single CS)'].iloc[i] == 'yes':
        natural_controls_all += 1

print('Use of natural controls (only explanatory single CS): ', natural_controls_all, ' => ', natural_controls_all/explan_single_len)


# Fisher's exact test
print('')

# Dubé & Paré (2003)
dp = [0, 17]

# my result
my = [natural_controls_all, explan_len-natural_controls_all]

# defining the table
data = [dp, my]

print("fisher is " + str(stats.fisher_exact(data)))

Number explanatory single cases:  1
Use of natural controls (only explanatory single CS):  0  =>  0.0

fisher is (nan, 1.0)


In [88]:
# Search for cross-case patterns (only multiple CS)

cross_all = 0

for i in range(0, multiple_len):
    if df_multiple['Search for cross-case patterns (only multiple CS)'].iloc[i] == 'yes':
        cross_all += 1

print('Search for cross-case patterns (only multiple CS): ', cross_all, ' => ', cross_all/multiple_len)


# chi2
print('')

# Dubé & Paré (2003)
dp = [45, 29]

# my result
my = [cross_all, multiple_len-cross_all]

# defining the table
data = [dp, my]
stat, p, dof, expected = chi2_contingency(data)
  
# interpret p-value
alpha = 0.05
print("stat/chi2 is " + str(stat))
print("p value is " + str(p))
print("dof is " + str(dof))
print("expected is " + str(expected))
if p <= alpha:
    print('Dependent (reject H0)')
else:
    print('Independent (H0 holds true)')
    
# Fisher's exact test
print('')

print("fisher is " + str(stats.fisher_exact(data)))

Search for cross-case patterns (only multiple CS):  11  =>  0.9166666666666666

stat/chi2 is 3.076097972972974
p value is 0.07945092866381553
dof is 1
expected is [[48.18604651 25.81395349]
 [ 7.81395349  4.18604651]]
Independent (H0 holds true)

fisher is (0.14106583072100312, 0.04984347115109176)


In [89]:
# Description of the observed world

detail_all = 0
rough_all = 0

for i in range(0, all_len):
    if df['Description of the observed world'].iloc[i] == 'in detail':
        detail_all += 1
    elif df['Description of the observed world'].iloc[i] == 'rough':
        rough_all += 1

print('in detail: ', detail_all, ' => ', detail_all/all_len)
print('rough: ', rough_all, ' => ', rough_all/all_len)

in detail:  34  =>  0.9714285714285714
rough:  1  =>  0.02857142857142857


In [90]:
# Excerpts of raw data in case report

excerpts_all = 0
quotes_all = 0
figures_all = 0
screenshots_all = 0
blog_all = 0

for i in range(0, all_len):
    if df['Excerpts of raw data in case report'].iloc[i] != 'no':
        excerpts_all += 1

print('excerpts used: ', excerpts_all, ' => ', excerpts_all/all_len)
        
for i in range(0, all_len):
    if 'quotes' in df['Excerpts of raw data in case report'].iloc[i]:
        quotes_all += 1
    if 'figures' in df['Excerpts of raw data in case report'].iloc[i]:
        figures_all += 1
    if 'screenshots' in df['Excerpts of raw data in case report'].iloc[i]:
        screenshots_all += 1
    if 'blog' in df['Excerpts of raw data in case report'].iloc[i]:
        blog_all += 1

print('quotes: ', quotes_all, ' => ', quotes_all/all_len)
print('photograph: ', figures_all, ' => ', figures_all/all_len)
print('screenshots: ', screenshots_all, ' => ', screenshots_all/all_len)
print('blog: ', blog_all, ' => ', blog_all/all_len)


excerpts_all = 0

for i in range(0, all_len):
    if df['Excerpts of raw data in case report'].iloc[i] == 'no':
        excerpts_all += 1

print('excerpts of raw data: ', 35-excerpts_all, ' => ', (35-excerpts_all)/all_len)


# chi2
print('')

# Dubé & Paré (2003)
dp = [61, 122]

# my result
my = [excerpts_all, all_len-excerpts_all]

# defining the table
data = [dp, my]
stat, p, dof, expected = chi2_contingency(data)
  
# interpret p-value
alpha = 0.05
print("stat/chi2 is " + str(stat))
print("p value is " + str(p))
print("dof is " + str(dof))
print("expected is " + str(expected))
if p <= alpha:
    print('Dependent (reject H0)')
else:
    print('Independent (H0 holds true)')

excerpts used:  32  =>  0.9142857142857143
quotes:  32  =>  0.9142857142857143
photograph:  1  =>  0.02857142857142857
screenshots:  1  =>  0.02857142857142857
blog:  1  =>  0.02857142857142857
excerpts of raw data:  32  =>  0.9142857142857143

stat/chi2 is 7.533514965226032
p value is 0.006056163536783058
dof is 1
expected is [[ 53.72477064 129.27522936]
 [ 10.27522936  24.72477064]]
Dependent (reject H0)


In [91]:
# Project reviews

project_review_all = 0

for i in range(0, all_len):
    if df['Project reviews'].iloc[i] == 'yes':
        project_review_all += 1

print('Project reviews: ', project_review_all, ' => ', project_review_all/all_len)


# chi2
print('')

# Dubé & Paré (2003)
dp = [27, 156]

# my result
my = [project_review_all, all_len-project_review_all]

# defining the table
data = [dp, my]
stat, p, dof, expected = chi2_contingency(data)
  
# interpret p-value
alpha = 0.05
print("stat/chi2 is " + str(stat))
print("p value is " + str(p))
print("dof is " + str(dof))
print("expected is " + str(expected))
if p <= alpha:
    print('Dependent (reject H0)')
else:
    print('Independent (H0 holds true)')

Project reviews:  14  =>  0.4

stat/chi2 is 10.665541735955605
p value is 0.0010914987936343312
dof is 1
expected is [[ 34.41743119 148.58256881]
 [  6.58256881  28.41743119]]
Dependent (reject H0)


In [92]:
# Comparison with conflicting literature (only exploratory)

confl_lit_all = 0

for i in range(0, explor_len):
    if df_explor['Comparison with conflicting literature (only exploratory)'].iloc[i] == 'yes':
        confl_lit_all += 1

print('Comparison with conflicting literature (only exploratory): ', confl_lit_all, ' => ', confl_lit_all/explor_len)


# chi2
print('')

# Dubé & Paré (2003)
dp = [6, 48]

# my result
my = [confl_lit_all, explor_len-confl_lit_all]

# defining the table
data = [dp, my]
stat, p, dof, expected = chi2_contingency(data)
  
# interpret p-value
alpha = 0.05
print("stat/chi2 is " + str(stat))
print("p value is " + str(p))
print("dof is " + str(dof))
print("expected is " + str(expected))
if p <= alpha:
    print('Dependent (reject H0)')
else:
    print('Independent (H0 holds true)')

Comparison with conflicting literature (only exploratory):  17  =>  0.53125

stat/chi2 is 16.022406097436292
p value is 6.259727427627159e-05
dof is 1
expected is [[14.44186047 39.55813953]
 [ 8.55813953 23.44186047]]
Dependent (reject H0)


In [93]:
# Comparison with similar literature (only exploratory)

sim_lit_all = 0

for i in range(0, explor_len):
    if df_explor['Comparison with similar literature (only exploratory)'].iloc[i] == 'yes':
        sim_lit_all += 1

print('Comparison with similar literature (only exploratory): ', sim_lit_all, ' => ', sim_lit_all/explor_len)


# chi2
print('')

# Dubé & Paré (2003)
dp = [20, 34]

# my result
my = [sim_lit_all, explor_len-sim_lit_all]

# defining the table
data = [dp, my]
stat, p, dof, expected = chi2_contingency(data)
  
# interpret p-value
alpha = 0.05
print("stat/chi2 is " + str(stat))
print("p value is " + str(p))
print("dof is " + str(dof))
print("expected is " + str(expected))
if p <= alpha:
    print('Dependent (reject H0)')
else:
    print('Independent (H0 holds true)')

Comparison with similar literature (only exploratory):  30  =>  0.9375

stat/chi2 is 24.275120241769542
p value is 8.351080537495508e-07
dof is 1
expected is [[31.39534884 22.60465116]
 [18.60465116 13.39534884]]
Dependent (reject H0)


_**Miscellaneous**_

In [94]:
# Key case study characteristics summarized 

cs_charact_all = 0

for i in range(0, all_len):
    if df['Key case study characteristics summarized '].iloc[i] == 'yes':
        cs_charact_all += 1

print('Key case study characteristics summarized : ', cs_charact_all, ' => ', cs_charact_all/all_len)

Key case study characteristics summarized :  0  =>  0.0


In [95]:
# Methodological literature cited
# note: for this analysis the cited methodological litertur style was standardized in the excel file (oriented to JSIS
# bibliography style + first names of the authors limited to first character of the name and no accents on characters)

methodol_all = 0

for i in range(0, all_len):
    if df['Methodological literature cited'].iloc[i] == 'yes':
        methodol_all += 1

print('Methodological literature cited: ', methodol_all, ' => ', methodol_all/all_len)


# sum of each methodological literature cited

#Allison, G., 1971. Essence of Decision
allison = 0
#Alvesson, M., and Sköldberg, K. 2009. Reflexive Methodology:
alvesson = 0
#Bala, H., Venkatesh, V., 2007. Assimilation of interorganizational business process standards
bala = 0
#Barley, S., 1996. Technicians in the Workplace: Ethnographic
barley = 0
#Barrett, M., Oborn, E., Orlikowski, W., Yates, J., 2012. Reconfiguring Boundary Relations: Robotic Innovations in Pharmacy Work,”
barrett = 0
#Bechky, B., Okhuysen, G., 2011. Expecting the Unexpected? How SWAT Officers and Film Crews Handle
bechky = 0
#Benbasat, I., Goldstein, D., Mead, M., 1987. The case research strategy in studies of information system
benbasat_1987 = 0
#Benbasat, I., Goldstein, D., Mead, M., 1997. The case research strategy in studies of information systems
benbasat_1997 = 0
#Biernacki, P., Waldorf, D., 1981. Snowball sampling: problem and techniques of chain referral sampling
biernacki = 0
#Birks, D., Fernandez, W., Levina, N., Nasirin, S., 2013. Grounded theory method in information systems research
birks = 0
#Blei, D., 2012. Probabilistic topic models
blei = 0
#Bowen, G., 2006. Grounded Theory and Sensitizing Concepts,”
bowen_2006 = 0
#Bowen, G., 2008. Grounded theory and sensitizing concepts
bowen_2008 = 0
#Boyatzis, R., 1998. Transforming Qualitative Information: Thematic Analysis and Code Development
boyatzis = 0
#Bryant, A., Charmaz, K., 2007. The Sage Handbook of Grounded Theory
bryant = 0
#Burton-Jones, A., McLean, E., and Monod, E., 2015. Theoretical Perspectives in IS Research: From Variance and Process to
burton = 0
#Chan, Y., Reich, B., 2007. IT alignment: what have we learned?
chan = 0
#Charmaz, K., 2006. Constructing Grounded Theory: A Practical Guide Through Qualitative Research
charmaz_2006 = 0
#Charmaz, K., 2014. Constructing Grounded Theory
charmaz_2014 = 0
#Corbin, J., Strauss, A., 1990. Grounded theory research: procedures, canons, and evaluative criteria
corbin_1990 = 0
#Corbin, J., Strauss, A., 2008. Basics of Qualitative Research: Techniques and Procedures for Developing Grounded Theory
corbin_2008 = 0
#Creswell, J., Clark, V., 2007. Designing and Conducting Mixed Methods Research
creswell = 0
#Darke, P., Shanks, G., Broadbent, M., 1998. Successfully completing case study research: combining rigour, relevance and pragmatism
darke = 0
#Denzin, N., 1997. Interpretive Ethnography: Ethnographic Practices for the 21st Century
denzin_1997 = 0
#Denzin, N., Lincoln, Y., 2000. Handbook of Qualitative Research
denzin_2000 = 0
#Dube, L., Pare, G., 2003. Rigor in information systems positivist case research: current practices, trends, and recommendations
dube = 0
#Easterby-Smith, M., Thorpe, R., Lowe, A., 1991. Management Research: An Introduction
easterby = 0
#Edmondson, A., McManus, S., 2007. Methodological Fit in Management Field Research,”
edmonson = 0
#Eisenhardt, K., 1989. Building theories from case study research
eisen_1989 = 0
#Eisenhardt, K., 2000. Paradox, Spirals, Ambivalence: The New Language of Change and Pluralism,”
eisen_2000 = 0
#Eisenhardt, K., Graebner, M., 2007. Theory building from cases: opportunities and challenges
eisen_2007 = 0
#Elbanna, A., 2010. Rethinking IS Project Boundaries in Practice:
elbanna = 0
#Elsbach, K., Sutton, R., 1992. Acquiring organizational legitimacy through illegitimate actions: a marriage of institutional and impression management theories
elsbach = 0
#Gerring, J., 2006. Case Study Research: Principles and Practices
gerring_2006 = 0
#Gerring, J., 2007. Case Study Research: Principles and Practices
gerring_2007 = 0
#Gersick, C., 1991. Revolutionary Change Theories: A Multilevel
gersick = 0
#Ghazawneh, A., Henfridsson, O., 2013. Balancing Platform Control and External Contribution in Third-Party Development:
ghazawneh = 0
#Gibbert, M., Ruigrok, W., 2010. The what and how of case study rigor: three strategies based on published work
gibbert_2010 = 0
#Gibbert, M., Ruigrok, W., Wicki, B., 2008. What Passes as a Rigorous Case Study?,”
gibbert_2008 = 0
#Gimpel, K., Schneider, N., O'Connor, B., Das, D., Mills, D., Eisenstein, J., Heilman, M., Yogatama, D., Flanigan, J., Smith, N., 2011. Part-of-speech tagging for twitter
gimpel = 0
#Gioia, D., Corley, K., and Hamilton, A., 2013. Seeking Qualitative Rigor in Inductive Research: Notes on the Gioia Methodology,”
gioia = 0
#Glaser, B., 1978. Theoretical Sensitivity: Advances in the Methodology of Grounded 
glaser_1978 = 0
#Glaser, B., Strauss, A., 1967. The Discovery of Grounded Theory: Strategies for Qualitative Research
glaser_1967 = 0
#Goetz, J., LeCompte, M., 1984. Ethnography and Qualitative Design in Educational Research
goetz = 0
#Golden, B., 1992. The past is the past – or is it? The use of retrospective accounts of past strategy
golden = 0
#Golsorkhi, D., Rouleau, L., Seidl, D., Vaara, E. (Eds.), 2010. Cambridge Handbook of Strategy as Practice
golsorkhi = 0
#Gregor, S., 2006. The Nature of Theory in Information Systems,”
gregor = 0
#Gregory, R., Beck, R., Keil, M., 2013. Control balancing in information systems development offshoring projects
gregory_2013 = 0
#Gregory, R., Keil, M., 2014. Blending bureaucratic and collaborative management styles to achieve control ambidexterity in IS projects
gregory_2014 = 0
#Gregory, R., Keil, M., Muntermann, J., Mähring, M., 2015. “Paradoxes and the Nature of Ambidexterity in IT Transformation
gregory_2015 = 0
#Guillemette, M., Pare, G., 2012. Transformation of the Information Technology Function in Organizations:
guill = 0
#Huang, J., Pan, S., Liu, J., 2017. Boundary permeability and online-offline hybrid organization: a case study of Suning
huang = 0
#Huber, G., Power, D., 1985. Retrospective Reports of Strategic Level Managers: Guidelines for Increasing Their
huber = 0
#Huberman, A., Miles, M., 1994. Data management and analysis methods
huberman = 0
#Jarzabkowski, P., 2005. Strategy as Practice: An Activity-based Approach
jarz_2005 = 0
#Jarzabkowski, P., 2010. An activity-theory approach to Strategy as Practice
jarz_2010 = 0
#Johns, G., 2006. The Essential Impact of Context on Organizational Behavior,”
johns = 0
#Kaplan, S., 2008. Framing contests: strategy making under uncertainty
kaplan = 0
#Katz, E., Haas, H., Gurevitch, M., 1973. On the Use of the Mass Media for Important Things,”
katz = 0
#Keil, T., 2002. External Corporate Venturing: Strategic Renewal in Rapidly Changing Industries
keil = 0
#Kelly, G., 1955. The Psychology of Personal ConstructsPettigrew, A.M., 1990. Longitudinal field research on change: theory and practice
kelly = 0
#Kirsch, L., 2004. Deploying Common Systems Globally:
kirsch = 0
#Kirsh, D., Maglio, P., 1994. On Distinguishing Epistemic from
kirsh = 0
#Klein, H., Myers, M., 1999. A set of principles for conducting and evaluating interpretive field studies in information systems
klein = 0
#Krippendorff, K., 2004. Content Analysis: An Introduction to Its Methodology
krippe = 0
#Kuzel, A., 1992. Sampling in Qualitative Inquiry,”
kuzel = 0
#Langley, A., 1999. Strategies for theorizing from process data
langley = 0
#Lee, A., 1989. A scientific methodology for MIS case studies
lee_1989 = 0
#Lee, A., Baskerville, R., 2003. Generalizing Generalizability in Information Systems Research,”
lee_2003 = 0
#Lee, J., Berente, N., 2012. Digital Innovation and the Division of Innovative Labor: Digital Controls in the Automotive Industry
lee_2012 = 0
#Leonardi, P., 2013. When does technology use enable network change in organizations? A comparative study of feature use and shared affordance
leonardi = 0
#Lincoln, Y., Guba, E., 1985. Naturalistic Inquiry
lincoln = 0
#Locke, K., Golden-Biddle, K., Feldman, M., 2008. Making Doubt Generative: Rethinking the Role of Doubt in the Research Process
locke = 0
#Mantere, S., Ketokivi, M., 2013. Reasoning in Organization Science,”
mantere = 0
#Markus, M., 1989. Case Selection in a Disconfirmatory Case
markus = 0
#Mays, N., Pope, C., 1995. Rigour and qualitative research
mays = 0
#Miles, M., Huberman, A., 1994. Qualitative Data Analysis: An Expanded Sourcebook
miles_1994 = 0
#Miles, R., Snow, C., Meyer, A., Coleman, H., 1978. Organizational strategy, structure, and process
miles_1978 = 0
#Miller, C., Cardinal, L., Glick, W., 1997. Retrospective Reports in Organizational Research: A Reexamination of
miller = 0
#Mimno, D., McCallum, A., 2007. Mining a digital library for influential authors
mimno = 0
#Mingers, J., 2014. Guidelines for Conducting Semiotic Research in Information Systems,”
mingers = 0
#Mingers, J., Willcocks, L., 2014. An Integrative Semiotic Framework for Information Systems: 
mingers_will_2014 = 0
#Mingers, J., Willcocks, L., 2017. An Integrative Semiotic Methodology for Is Research,”
mingers_will_2017 = 0
#Montealegre, R., 2002. A process model of capability development: lessons from the electronic commerce strategy at Bolsa de Valores de Guayaquil
monte = 0
#Myers, M., 1997. Qualitative research in information systems
myers_1997 = 0
#Myers, M., 2009. Qualitative Research in Business & Management
myers_2009 = 0
#Myers, M., Newman, M., 2007. The qualitative interview in IS research: examining the craft
myers_2007 = 0
#Ngwenyama, O., Klein, S., 2018. Phronesis, argumentation and puzzle solving in IS research: illustrating an approach to phronetic IS research practice
ngwenyama = 0
#Niiniluoto, I., 1999. Defending Abduction
nii = 0
#Pan, S., Tan, B., 2011. Demystifying case research: a structured pragmatic situational (SPS) approach to conducting case studies
pan = 0
#Pare, G., 2004. Investigating information systems with positivist case research
pare = 0
#Patton, M., 1990. Qualitative Evaluation and Research Methods
patton_1990 = 0
#Patton, M., 2002. Qualitative Research and Evaluation Methods
patton_2002 = 0
#Pentland, B., 1999. Building process theory with narrative: from description to explanation
pentland = 0
#Pettigrew, A., 1990. Longitudinal field research on change: theory and practice
pettigrew = 0
#Pickering, A., 1993. The Mangle of Practice: Agency and Emergence
pick = 0
#Poole, M., Lambert, N., Murase, T., Asencio, R., McDonald, J., 2016. Sequential analysis of processes
poole = 0
#Pratt, M., 2008. Fitting oval pegs into round holes: tensions in evaluating and publishing qualitative research in top-tier American journals
pratt = 0
#Reich, B., Benbasat, I., 1996. Measuring the linkage between business and information technology objectives
reich = 0
#Reynolds, T., Gutman, J., 1988. Laddering Theory, Method, Analysis, and Interpretation
reynolds = 0
#Rubin, H., Rubin, I., 2005. Qualitative Interviewing: The Art of Hearing Data
rubin = 0
#Sabherwal, R., Chan, Y., 2001. Alignment between business and IS strategies: a study of prospectors, analyzers, and defenders
sab_2001 = 0
#Sabherwal, R., Robey, D., 1995. Reconciling Variance and Process Strategies for Studying Information System Development,”
sab_1995 = 0
#Sarker, S., Sarker, A., Sahaym, S., Bjørn-Andersen, N., 2012. Exploring value cocreation in relationships between an ERP vendor and its partners: a revelatory case
sarker = 0
#Schultze, U., Avital, M., 2011. Designing Interviews to Generate Rich Data for Information Systems Research
schultze = 0
#Seidel, S., Recker, J., vom Brocke, J., 2013. Sensemaking and sustainable practicing: functional affordances of information systems in green transformations
seidel = 0
#Siggelkow, N., 2007. Persuasion with case studies
siggelkow = 0
#Spradley, J., 1979. The Ethnographic Interview
spradley = 0
#Strauss, A., Corbin, J., 1998. Basics of Qualitative Research: Techniques and Procedures for Developing Grounded Theory
strauss = 0
#Street, C., Ward, K., 2012. Improving validity and reliability in longitudinal case study timelines
street = 0
#Strong, D., Volkoff, O., Johnson, S., Pelletier, L., Tulu, B., Bar-On, I., Trudel, J., Garber, L., 2014. A theory of organization-EHR affordance actualization
strong = 0
#Tan, B., Pan, S., Hackney, R., 2010. The strategic implications of web technologies: a process model of how web technologies enhance organizational
tan = 0
#Tsang, E., 2014. Case studies and generalization in information systems research: A critical realist perspective
tsang = 0
#Urquhart, C., 2013. Grounded Theory for Qualitative Research
ur_2013 = 0
#Urquhart, C., Lehmann, H., Myers, M., 2010. Putting the ‘theory’back into grounded theory: guidelines for grounded theory studies in information systems
ur_2010 = 0
#Uys, J., Du Preez, N., Uys, E., 2008. Leveraging unstructured information using topic modelling
uys = 0
#Van de Ven, A., 1992. Suggestions for Studying Strategy
ven_1992 = 0
#Van de Ven, A., 2007. Engaged Scholarship: A Guide for Organizational and Social Research
ven_2007 = 0
#Van de Ven, A., Poole, M., 1995. Explaining Development and Change in Organizations
ven_1995 = 0
#Venkatesh, V., Brown, S., Bala, H., 2013. Bridging the Qualitative-Quantitative Divide : Guidelines for Conducting
venka = 0
#Voss, C., Tsikriktsis, N., Frohlich, M., 2002. Case research in operations management
voss = 0
#Walsh, I., Holton, J., Bailyn, L., Fernandez, W., Levina, N., and Glaser, B., 2015. What Grounded Theory Is … A Critically
walsh = 0
#Walsham, G., 2006. Doing interpretive research
walsham = 0
#Weick, K., 1995. Sensemaking in Organizations
weick = 0
#Yin, R., 1984. Case Study Research
yin_1984 = 0
#Yin, R., 1994. Applications of Case Study Research
yin_1994a = 0
#Yin, R., 1994. Case Study Research: Design and Methods
yin_1994b = 0
#Yin, R., 2002. Case Study Research: Design and Methods
yin_2002 = 0
#Yin, R., 2003. Case Study Research: Design and Methods
yin_2003 = 0
#Yin, R., 2009. Case Study Research: Design and Methods
yin_2009 = 0
#Yin, R., 2014. Case Study Research: Design and Methods
yin_2014 = 0
#Yin, R., 2015. Qualitative Research from Start to Finish
yin_2015 = 0
#Yoo, Y., Henfridsson, O., Lyytinen, K., 2010. The New Organizing Logics of Digital Innovation: An Agenda for Information Systems Research
yoo = 0


for i in range(0, all_len):       
    if 'Allison' in df['Methodological literature cited DETAIL'].iloc[i]:
        allison += 1
    if 'Alvesson' in df['Methodological literature cited DETAIL'].iloc[i]:
        alvesson += 1
    if 'Bala, H., Venkatesh' in df['Methodological literature cited DETAIL'].iloc[i]:
        bala += 1
    if 'Barley, S., 1996' in df['Methodological literature cited DETAIL'].iloc[i]:
        barley += 1    
    if 'Barrett, M., Oborn' in df['Methodological literature cited DETAIL'].iloc[i]:
        barrett += 1
    if 'Bechky, B., Okhuysen' in df['Methodological literature cited DETAIL'].iloc[i]:
        bechky += 1   
    if 'Benbasat, I., Goldstein, D., Mead, M., 1987' in df['Methodological literature cited DETAIL'].iloc[i]:
        benbasat_1987 += 1    
    if 'Benbasat, I., Goldstein, D., Mead, M., 1997' in df['Methodological literature cited DETAIL'].iloc[i]:
        benbasat_1997 += 1
    if 'Biernacki, P., Waldorf' in df['Methodological literature cited DETAIL'].iloc[i]:
        biernacki += 1    
    if 'Birks, D., Fern' in df['Methodological literature cited DETAIL'].iloc[i]:
        birks += 1
    if 'Blei, D., 2012' in df['Methodological literature cited DETAIL'].iloc[i]:
        blei += 1        
    if 'Bowen, G., 2006' in df['Methodological literature cited DETAIL'].iloc[i]:
        bowen_2006 += 1        
    if 'Bowen, G., 2008' in df['Methodological literature cited DETAIL'].iloc[i]:
        bowen_2008 += 1        
    if 'Boyatzis, R., 1998' in df['Methodological literature cited DETAIL'].iloc[i]:
        boyatzis += 1        
    if 'Bryant, A., Charmaz' in df['Methodological literature cited DETAIL'].iloc[i]:
        bryant += 1        
    if 'Burton-Jones, A., McLean' in df['Methodological literature cited DETAIL'].iloc[i]:
        burton += 1        
    if 'Chan, Y., Reich' in df['Methodological literature cited DETAIL'].iloc[i]:
        chan += 1        
    if 'Charmaz, K., 2006' in df['Methodological literature cited DETAIL'].iloc[i]:
        charmaz_2006 += 1        
    if 'Charmaz, K., 2014' in df['Methodological literature cited DETAIL'].iloc[i]:
        charmaz_2014 += 1        
    if 'Corbin, J., Strauss, A., 1990' in df['Methodological literature cited DETAIL'].iloc[i]:
        corbin_1990 += 1        
    if 'Corbin, J., Strauss, A., 2008' in df['Methodological literature cited DETAIL'].iloc[i]:
        corbin_2008 += 1
    if 'Creswell, J., Clark' in df['Methodological literature cited DETAIL'].iloc[i]:
        creswell += 1
    if 'Darke, P., Shanks' in df['Methodological literature cited DETAIL'].iloc[i]:
        darke += 1 
    if 'Denzin, N., 1997' in df['Methodological literature cited DETAIL'].iloc[i]:
        denzin_1997 += 1 
    if 'Denzin, N., Lincoln' in df['Methodological literature cited DETAIL'].iloc[i]:
        denzin_2000 += 1         
    if 'Dube, L., Pare' in df['Methodological literature cited DETAIL'].iloc[i]:
        dube += 1 
    if 'Easterby-Smith, M., Thorpe' in df['Methodological literature cited DETAIL'].iloc[i]:
        easterby += 1         
    if 'Edmondson, A., McManus' in df['Methodological literature cited DETAIL'].iloc[i]:
        edmonson += 1         
    if 'Eisenhardt, K., 1989' in df['Methodological literature cited DETAIL'].iloc[i]:
        eisen_1989 += 1 
    if 'Eisenhardt, K., 2000' in df['Methodological literature cited DETAIL'].iloc[i]:
        eisen_2000 += 1         
    if 'Eisenhardt, K., Graebner' in df['Methodological literature cited DETAIL'].iloc[i]:
        eisen_2007 += 1         
    if 'Elbanna, A., 2010' in df['Methodological literature cited DETAIL'].iloc[i]:
        elbanna += 1         
    if 'Elsbach, K., Sutton' in df['Methodological literature cited DETAIL'].iloc[i]:
        elsbach += 1         
    if 'Gerring, J., 2006' in df['Methodological literature cited DETAIL'].iloc[i]:
        gerring_2006 += 1         
    if 'Gerring, J., 2007' in df['Methodological literature cited DETAIL'].iloc[i]:
        gerring_2007 += 1         
    if 'Gersick, C., 1991' in df['Methodological literature cited DETAIL'].iloc[i]:
        gersick += 1         
    if 'Ghazawneh, A., Henfridsson' in df['Methodological literature cited DETAIL'].iloc[i]:
        ghazawneh += 1         
    if 'Gibbert, M., Ruigrok, W., 2010' in df['Methodological literature cited DETAIL'].iloc[i]:
        gibbert_2010 += 1         
    if 'Gibbert, M., Ruigrok, W., Wicki, B., 2008' in df['Methodological literature cited DETAIL'].iloc[i]:
        gibbert_2008 += 1         
    if 'Gimpel, K., Schneider' in df['Methodological literature cited DETAIL'].iloc[i]:
        gimpel += 1         
    if 'Gioia, D., Corley' in df['Methodological literature cited DETAIL'].iloc[i]:
        gioia += 1 
    if 'Glaser, B., 1978' in df['Methodological literature cited DETAIL'].iloc[i]:
        glaser_1978 += 1 
    if 'Glaser, B., Strauss' in df['Methodological literature cited DETAIL'].iloc[i]:
        glaser_1967 += 1         
    if 'Goetz, J., LeCompte' in df['Methodological literature cited DETAIL'].iloc[i]:
        goetz += 1         
    if 'Golden, B., 1992' in df['Methodological literature cited DETAIL'].iloc[i]:
        golden += 1         
    if 'Golsorkhi, D., Rouleau' in df['Methodological literature cited DETAIL'].iloc[i]:
        golsorkhi += 1         
    if 'Gregor, S., 2006' in df['Methodological literature cited DETAIL'].iloc[i]:
        gregor += 1         
    if 'Gregory, R., Beck' in df['Methodological literature cited DETAIL'].iloc[i]:
        gregory_2013 += 1         
    if 'Gregory, R., Keil, M., 2014' in df['Methodological literature cited DETAIL'].iloc[i]:
        gregory_2014 += 1         
    if 'Gregory, R., Keil, M., Muntermann' in df['Methodological literature cited DETAIL'].iloc[i]:
        gregory_2015 += 1  
    if 'Guillemette, M., Pare' in df['Methodological literature cited DETAIL'].iloc[i]:
        guill += 1      
    if 'Huang, J., Pan' in df['Methodological literature cited DETAIL'].iloc[i]:
        huang += 1        
    if 'Huber, G., Power' in df['Methodological literature cited DETAIL'].iloc[i]:
        huber += 1      
    if 'Huberman, A., Miles' in df['Methodological literature cited DETAIL'].iloc[i]:
        huberman += 1        
    if 'Jarzabkowski, P., 2005' in df['Methodological literature cited DETAIL'].iloc[i]:
        jarz_2005 += 1      
    if 'Jarzabkowski, P., 2010' in df['Methodological literature cited DETAIL'].iloc[i]:
        jarz_2010 += 1
    if 'Johns, G., 2006' in df['Methodological literature cited DETAIL'].iloc[i]:
        johns += 1      
    if 'Kaplan, S., 2008' in df['Methodological literature cited DETAIL'].iloc[i]:
        kaplan += 1        
    if 'Katz, E., Haas' in df['Methodological literature cited DETAIL'].iloc[i]:
        katz += 1      
    if 'Keil, T., 2002' in df['Methodological literature cited DETAIL'].iloc[i]:
        keil += 1        
    if 'Kelly, G., 1955' in df['Methodological literature cited DETAIL'].iloc[i]:
        kelly += 1      
    if 'Kirsch, L., 2004' in df['Methodological literature cited DETAIL'].iloc[i]:
        kirsch += 1        
    if 'Kirsh, D., Maglio' in df['Methodological literature cited DETAIL'].iloc[i]:
        kirsh += 1      
    if 'Klein, H., Myers' in df['Methodological literature cited DETAIL'].iloc[i]:
        klein += 1        
    if 'Krippendorff, K., 2004' in df['Methodological literature cited DETAIL'].iloc[i]:
        krippe += 1      
    if 'Kuzel, A., 1992' in df['Methodological literature cited DETAIL'].iloc[i]:
        kuzel += 1        
    if 'Langley, A., 1999' in df['Methodological literature cited DETAIL'].iloc[i]:
        langley += 1      
    if 'Lee, A., 1989' in df['Methodological literature cited DETAIL'].iloc[i]:
        lee_1989 += 1        
    if 'Lee, A., Baskerville' in df['Methodological literature cited DETAIL'].iloc[i]:
        lee_2003 += 1      
    if 'Lee, J., Berente' in df['Methodological literature cited DETAIL'].iloc[i]:
        lee_2012 += 1        
    if 'Leonardi, P., 2013' in df['Methodological literature cited DETAIL'].iloc[i]:
        leonardi += 1      
    if 'Lincoln, Y., Guba' in df['Methodological literature cited DETAIL'].iloc[i]:
        lincoln += 1        
    if 'Locke, K., Golden' in df['Methodological literature cited DETAIL'].iloc[i]:
        locke += 1      
    if 'Mantere, S., Ketokivi' in df['Methodological literature cited DETAIL'].iloc[i]:
        mantere += 1        
    if 'Markus, M., 1989' in df['Methodological literature cited DETAIL'].iloc[i]:
        markus += 1      
    if 'Mays, N., Pope' in df['Methodological literature cited DETAIL'].iloc[i]:
        mays += 1         
    if 'Miles, M., Huberman' in df['Methodological literature cited DETAIL'].iloc[i]:
        miles_1994 += 1      
    if 'Miles, R., Snow' in df['Methodological literature cited DETAIL'].iloc[i]:
        miles_1978 += 1         
    if 'Miller, C., Cardinal' in df['Methodological literature cited DETAIL'].iloc[i]:
        miller += 1      
    if 'Mimno, D., McCallum' in df['Methodological literature cited DETAIL'].iloc[i]:
        mimno += 1         
    if 'Mingers, J., 2014' in df['Methodological literature cited DETAIL'].iloc[i]:
        mingers += 1      
    if 'Mingers, J., Willcocks, L., 2014' in df['Methodological literature cited DETAIL'].iloc[i]:
        mingers_will_2014 += 1         
    if 'Mingers, J., Willcocks, L., 2017' in df['Methodological literature cited DETAIL'].iloc[i]:
        mingers_will_2017 += 1      
    if 'Montealegre, R., 2002' in df['Methodological literature cited DETAIL'].iloc[i]:
        monte += 1         
    if 'Myers, M., 1997' in df['Methodological literature cited DETAIL'].iloc[i]:
        myers_1997 += 1      
    if 'Myers, M., 2009' in df['Methodological literature cited DETAIL'].iloc[i]:
        myers_2009 += 1         
    if 'Myers, M., Newman' in df['Methodological literature cited DETAIL'].iloc[i]:
        myers_2007 += 1  
    if 'Ngwenyama, O., Klein' in df['Methodological literature cited DETAIL'].iloc[i]:
        ngwenyama += 1         
    if 'Niiniluoto, I., 1999' in df['Methodological literature cited DETAIL'].iloc[i]:
        nii += 1      
    if 'Pan, S., Tan' in df['Methodological literature cited DETAIL'].iloc[i]:
        pan += 1 
    if 'Pare, G., 2004' in df['Methodological literature cited DETAIL'].iloc[i]:
        pare += 1         
    if 'Patton, M., 1990' in df['Methodological literature cited DETAIL'].iloc[i]:
        patton_1990 += 1      
    if 'Patton, M., 2002' in df['Methodological literature cited DETAIL'].iloc[i]:
        patton_2002 += 1
    if 'Pentland, B., 1999' in df['Methodological literature cited DETAIL'].iloc[i]:
        pentland += 1         
    if 'Pettigrew, A., 1990' in df['Methodological literature cited DETAIL'].iloc[i]:
        pettigrew += 1      
    if 'Pickering, A., 1993' in df['Methodological literature cited DETAIL'].iloc[i]:
        pick += 1
    if 'Poole, M., Lambert' in df['Methodological literature cited DETAIL'].iloc[i]:
        poole += 1         
    if 'Pratt, M., 2008' in df['Methodological literature cited DETAIL'].iloc[i]:
        pratt += 1      
    if 'Reich, B., Benbasat' in df['Methodological literature cited DETAIL'].iloc[i]:
        reich += 1
    if 'Reynolds, T., Gutman' in df['Methodological literature cited DETAIL'].iloc[i]:
        reynolds += 1         
    if 'Rubin, H., Rubin' in df['Methodological literature cited DETAIL'].iloc[i]:
        rubin += 1      
    if 'Sabherwal, R., Chan, Y., 2001' in df['Methodological literature cited DETAIL'].iloc[i]:
        sab_2001 += 1
    if 'Sabherwal, R., Robey, D., 1995' in df['Methodological literature cited DETAIL'].iloc[i]:
        sab_1995 += 1         
    if 'Sarker, S., Sarker' in df['Methodological literature cited DETAIL'].iloc[i]:
        sarker += 1      
    if 'Schultze, U., Avital' in df['Methodological literature cited DETAIL'].iloc[i]:
        schultze += 1
    if 'Seidel, S., Recker' in df['Methodological literature cited DETAIL'].iloc[i]:
        seidel += 1         
    if 'Siggelkow, N., 2007' in df['Methodological literature cited DETAIL'].iloc[i]:
        siggelkow += 1      
    if 'Spradley, J., 1979' in df['Methodological literature cited DETAIL'].iloc[i]:
        spradley += 1
    if 'Strauss, A., Corbin' in df['Methodological literature cited DETAIL'].iloc[i]:
        strauss += 1      
    if 'Street, C., Ward' in df['Methodological literature cited DETAIL'].iloc[i]:
        street += 1
    if 'Strong, D., Volkoff' in df['Methodological literature cited DETAIL'].iloc[i]:
        strong += 1         
    if 'Tan, B., Pan' in df['Methodological literature cited DETAIL'].iloc[i]:
        tan += 1      
    if 'Tsang, E., 2014' in df['Methodological literature cited DETAIL'].iloc[i]:
        tsang += 1
    if 'Urquhart, C., 2013' in df['Methodological literature cited DETAIL'].iloc[i]:
        ur_2013 += 1      
    if 'Urquhart, C., Lehmann' in df['Methodological literature cited DETAIL'].iloc[i]:
        ur_2010 += 1
    if 'Uys, J., Du Preez' in df['Methodological literature cited DETAIL'].iloc[i]:
        uys += 1         
    if 'Van de Ven, A., 1992' in df['Methodological literature cited DETAIL'].iloc[i]:
        ven_1992 += 1      
    if 'Van de Ven, A., 2007' in df['Methodological literature cited DETAIL'].iloc[i]:
        ven_2007 += 1
    if 'Van de Ven, A., Poole' in df['Methodological literature cited DETAIL'].iloc[i]:
        ven_1995 += 1      
    if 'Venkatesh, V., Brown' in df['Methodological literature cited DETAIL'].iloc[i]:
        venka += 1
    if 'Voss, C., Tsikriktsis' in df['Methodological literature cited DETAIL'].iloc[i]:
        voss += 1         
    if 'Walsh, I., Holton' in df['Methodological literature cited DETAIL'].iloc[i]:
        walsh += 1      
    if 'Walsham, G., 2006' in df['Methodological literature cited DETAIL'].iloc[i]:
        walsham += 1
    if 'Weick, K., 1995' in df['Methodological literature cited DETAIL'].iloc[i]:
        weick += 1      
    if 'Yin, R., 1984' in df['Methodological literature cited DETAIL'].iloc[i]:
        yin_1984 += 1
    if 'Yin, R., 1994. App' in df['Methodological literature cited DETAIL'].iloc[i]:
        yin_1994a += 1 
    if 'Yin, R., 1994. Case' in df['Methodological literature cited DETAIL'].iloc[i]:
        yin_1994b += 1  
    if 'Yin, R., 2002. Case' in df['Methodological literature cited DETAIL'].iloc[i]:
        yin_2002 += 1      
    if 'Yin, R., 2003. Case' in df['Methodological literature cited DETAIL'].iloc[i]:
        yin_2003 += 1
    if 'Yin, R., 2009. Case' in df['Methodological literature cited DETAIL'].iloc[i]:
        yin_2009 += 1         
    if 'Yin, R., 2014. Case' in df['Methodological literature cited DETAIL'].iloc[i]:
        yin_2014 += 1      
    if 'Yin, R., 2015. Qual' in df['Methodological literature cited DETAIL'].iloc[i]:
        yin_2015 += 1
    if 'Yoo, Y., Henfridsson' in df['Methodological literature cited DETAIL'].iloc[i]:
        yoo += 1

print('Allison: ', allison)
print('Alvesson: ', alvesson)
print('Bala, H., Venkatesh: ', bala)
print('Barley: ', barley)
print('Barrett, M., Oborn: ', barrett)
print('Bechky, B., Okhuysen: ', bechky)
print('Benbasat, I., Goldstein, D., Mead, M., 1987: ', benbasat_1987)
print('Benbasat, I., Goldstein, D., Mead, M., 1997: ', benbasat_1997)
print('Biernacki, P., Waldorf: ', biernacki)
print('Birks, D., Fern: ', birks)
print('Blei, D., 2012: ', blei)
print('Bowen, G., 2006: ', bowen_2006)
print('Bowen, G., 2008: ', bowen_2008)
print('Boyatzis, R., 1998: ', boyatzis)
print('Bryant, A., Charmaz: ', bryant)
print('urton-Jones, A., McLean: ', burton)
print('Chan, Y., Reich: ', chan)
print('Charmaz, K., 2006: ', charmaz_2006)
print('Charmaz, K., 2014: ', charmaz_2014)
print('Corbin, J., Strauss, A., 1990: ', corbin_1990)
print('Corbin, J., Strauss, A., 2008: ', corbin_2008)
print('Creswell, J., Clark: ', creswell)
print('Darke, P., Shanks: ', darke)
print('Denzin, N., 1997: ', denzin_1997)
print('Denzin, N., Lincoln: ', denzin_2000)
print('Dube, L., Pare: ', dube)
print('Easterby-Smith, M., Thorpe: ', easterby)
print('Edmondson, A., McManus: ', edmonson)
print('Eisenhardt, K., 1989: ', eisen_1989)
print('Eisenhardt, K., 2000: ', eisen_2000)
print('Eisenhardt, K., Graebner: ', eisen_2007)
print('Elbanna, A., 2010: ', elbanna)
print('Elsbach, K., Sutton: ', elsbach)
print('Gerring, J., 2006: ', gerring_2006)
print('Gerring, J., 2007: ', gerring_2007)
print('Gersick, C., 1991: ', gersick)
print('Ghazawneh, A., Henfridsson: ', ghazawneh)
print('Gibbert, M., Ruigrok, W., 2010: ', gibbert_2010)
print('Gibbert, M., Ruigrok, W., Wicki, B., 2008: ', gibbert_2008)
print('Gimpel, K., Schneider: ', gimpel)
print('Gioia, D., Corley: ', gioia)
print('Glaser, B., 1978: ', glaser_1978)
print('Glaser, B., Strauss: ', glaser_1967)
print('Goetz, J., LeCompte: ', goetz)
print('Golden, B., 1992: ', golden)
print('Golsorkhi, D., Rouleau: ', golsorkhi)
print('Gregor, S., 2006: ', gregor)
print('Gregory, R., Beck: ', gregory_2013)
print('Gregory, R., Keil, M., 2014: ', gregory_2014)
print('Gregory, R., Keil, M., Muntermann: ', gregory_2015)
print('Guillemette, M., Pare: ', guill)
print('Huang, J., Pan: ', huang)
print('Huber, G., Power: ', huber)
print('Huberman, A., Miles: ', huberman)
print('Jarzabkowski, P., 2005: ', jarz_2005)
print('Jarzabkowski, P., 2010: ', jarz_2010)
print('Johns, G., 2006: ', johns)
print('Kaplan, S., 2008: ', kaplan)
print('Katz, E., Haas: ', katz)
print('Keil, T., 2002: ', keil)
print('Kelly, G., 1955: ', kelly)
print('Kirsch, L., 2004: ', kirsch)
print('Kirsh, D., Maglio: ', kirsh)
print('Klein, H., Myers: ', klein)
print('Krippendorff, K., 2004: ', krippe)
print('Kuzel, A., 1992: ', kuzel)
print('Langley, A., 1999: ', langley)
print('Lee, A., 1989: ', lee_1989)
print('Lee, A., Baskerville: ', lee_2003)
print('Lee, J., Berente: ', lee_2012)
print('Leonardi, P., 2013: ', leonardi)
print('Lincoln, Y., Guba: ', lincoln)
print('Locke, K., Golden: ', locke)
print('Mantere, S., Ketokivi: ', mantere)
print('Markus, M., 1989: ', markus)
print('Mays, N., Pope: ', mays)
print('Miles, M., Huberman: ', miles_1994)
print('Miles, R., Snow: ', miles_1978)
print('Miller, C., Cardinal: ', miller)
print('Mimno, D., McCallum: ', mimno)
print('Mingers, J., 2014: ', mingers)
print('Mingers, J., Willcocks, L., 2014: ', mingers_will_2014)
print('Mingers, J., Willcocks, L., 2017: ', mingers_will_2017)
print('Montealegre, R., 2002: ', monte)
print('Myers, M., 1997: ', myers_1997)
print('Myers, M., 2009: ', myers_2009)
print('Myers, M., Newman: ', myers_2007)
print('Ngwenyama, O., Klein: ', ngwenyama)
print('Niiniluoto, I., 1999: ', nii)
print('Pan, S., Tan: ', pan)
print('Pare, G., 2004: ', pare)
print('Patton, M., 1990: ', patton_1990)
print('Patton, M., 2002: ', patton_2002)
print('Pentland, B., 1999: ', pentland)
print('Pettigrew, A., 1990: ', pettigrew)
print('Pickering, A., 1993: ', pick)
print('Poole, M., Lambert: ', poole)
print('Pratt, M., 2008: ', pratt)
print('Reich, B., Benbasat: ', reich)
print('Reynolds, T., Gutman: ', reynolds)
print('Rubin, H., Rubin: ', rubin)
print('Sabherwal, R., Chan, Y., 2001: ', sab_2001)
print('Sabherwal, R., Robey, D., 1995: ', sab_1995)
print('Sarker, S., Sarker: ', sarker)
print('Schultze, U., Avital: ', schultze)
print('Seidel, S., Recker: ', seidel)
print('Siggelkow, N., 2007: ', siggelkow)
print('Spradley, J., 1979: ', spradley)
print('Strauss, A., Corbin: ', strauss)
print('Street, C., Ward: ', street)
print('Strong, D., Volkoff: ', strong)
print('Tan, B., Pan: ', tan)
print('Tsang, E., 2014: ', tsang)
print('Urquhart, C., 2013: ', ur_2013)
print('Urquhart, C., Lehmann: ', ur_2010)
print('Uys, J., Du Preez: ', uys)
print('Van de Ven, A., 1992: ', ven_1992)
print('Van de Ven, A., 2007: ', ven_2007)
print('Van de Ven, A., Poole: ', ven_1995)
print('Venkatesh, V., Brown: ', venka)
print('Voss, C., Tsikriktsis: ', voss)
print('Walsh, I., Holton: ', walsh)
print('Walsham, G., 2006: ', walsham)
print('Weick, K., 1995: ', weick)
print('Yin, R., 1984: ', yin_1984)
print('Yin, R., 1994. App: ', yin_1994a)
print('Yin, R., 1994. Case: ', yin_1994b)
print('Yin, R., 2002. Case: ', yin_2002)
print('Yin, R., 2003. Case: ', yin_2003)
print('Yin, R., 2009. Case: ', yin_2009)
print('Yin, R., 2014. Case: ', yin_2014)
print('Yin, R., 2015. Qual: ', yin_2015)
print('Yoo, Y., Henfridsson: ', yoo)


Methodological literature cited:  35  =>  1.0
Allison:  1
Alvesson:  1
Bala, H., Venkatesh:  1
Barley:  1
Barrett, M., Oborn:  1
Bechky, B., Okhuysen:  1
Benbasat, I., Goldstein, D., Mead, M., 1987:  5
Benbasat, I., Goldstein, D., Mead, M., 1997:  1
Biernacki, P., Waldorf:  1
Birks, D., Fern:  2
Blei, D., 2012:  1
Bowen, G., 2006:  1
Bowen, G., 2008:  1
Boyatzis, R., 1998:  2
Bryant, A., Charmaz:  1
urton-Jones, A., McLean:  1
Chan, Y., Reich:  1
Charmaz, K., 2006:  5
Charmaz, K., 2014:  1
Corbin, J., Strauss, A., 1990:  6
Corbin, J., Strauss, A., 2008:  1
Creswell, J., Clark:  1
Darke, P., Shanks:  1
Denzin, N., 1997:  1
Denzin, N., Lincoln:  1
Dube, L., Pare:  4
Easterby-Smith, M., Thorpe:  1
Edmondson, A., McManus:  1
Eisenhardt, K., 1989:  11
Eisenhardt, K., 2000:  1
Eisenhardt, K., Graebner:  5
Elbanna, A., 2010:  1
Elsbach, K., Sutton:  1
Gerring, J., 2006:  1
Gerring, J., 2007:  4
Gersick, C., 1991:  1
Ghazawneh, A., Henfridsson:  1
Gibbert, M., Ruigrok, W., 2010:  1
Gibbert, M.

In [96]:
#Other case studies cited

other_cs_all = 0

for i in range(0, all_len):
    if df['Other case studies cited'].iloc[i] == 'yes':
        other_cs_all += 1

print('Other case studies cited: ', other_cs_all, ' => ', other_cs_all/all_len)

Other case studies cited:  23  =>  0.6571428571428571


In [97]:
#Online-Appendix

online_appendix_all = 0

for i in range(0, all_len):
    if df['Online-Appendix'].iloc[i] == 'yes':
        online_appendix_all += 1

print('Online-Appendix: ', online_appendix_all, ' => ', online_appendix_all/all_len)

Online-Appendix:  2  =>  0.05714285714285714
