### Importing Necessary Libraries

In [1]:
import pandas as pd
import numpy as np
import random

## Partial Profile Discrete Choice Experiment

We dropped $Bank\enspace Account$ primarily because:
- We had PPDCE for $11$ attributes, each of which are $2$ level.
- We found $Maternal\enspace Health$ and $Bank\enspace Account$ are dropped from the new MPI model.
- We saw $Bank\enspace Account$ has lower contribution in West Bengal.

In [2]:
poverty_indicators = 'Nutrition, Years of Schooling, Cooking Fuel, Housing, Sanitation, Maternal Health, School Attendance, Assets, Drinking Water, Electricity, Child & Adolescent Mortality'.split(', ')

In [3]:
poverty_indicators

['Nutrition',
 'Years of Schooling',
 'Cooking Fuel',
 'Housing',
 'Sanitation',
 'Maternal Health',
 'School Attendance',
 'Assets',
 'Drinking Water',
 'Electricity',
 'Child & Adolescent Mortality']

In [4]:
len(poverty_indicators)

11

In [5]:
map_label_to_emojis = {
  '1': '✅',
  '2': '❌',
  '0': ''
}

In [6]:
design = pd.read_csv(
  'partial-profile-DCE.txt',
  sep=' ',
  names = ['Pair', *[x+'1' for x in poverty_indicators], *[x+'2' for x in poverty_indicators]],
  header = 0
)

In [7]:
minus_1 = [[x if x != '-1' else '2' for x in row] for row in design.to_numpy()[:,1:]][0][2]

In [8]:
minus_1

'−1'

In [27]:
arr = np.array([[x if x != minus_1 else '2' for x in row] for row in design.to_numpy()])

In [28]:
arr

array([['1', '1', '1', ..., '0', '0', '0'],
       ['2', '1', '2', ..., '0', '0', '0'],
       ['3', '1', '1', ..., '0', '0', '0'],
       ...,
       ['118', '0', '0', ..., '1', '1', '2'],
       ['119', '0', '0', ..., '2', '1', '1'],
       ['120', '0', '0', ..., '1', '1', '1']], dtype='<U21')

In [29]:
first_pair = pd.DataFrame(np.vectorize(map_label_to_emojis.get)(arr[:,1:12]))

In [30]:
second_pair = pd.DataFrame(np.vectorize(map_label_to_emojis.get)(arr[:,12:]))

In [31]:
first_pair.columns = poverty_indicators
second_pair.columns = poverty_indicators

In [32]:
first_pair

Unnamed: 0,Nutrition,Years of Schooling,Cooking Fuel,Housing,Sanitation,Maternal Health,School Attendance,Assets,Drinking Water,Electricity,Child & Adolescent Mortality
0,✅,✅,❌,❌,,,,,,,
1,✅,❌,❌,❌,,,,,,,
2,✅,✅,❌,✅,,,,,,,
3,✅,❌,❌,✅,,,,,,,
4,✅,✅,✅,❌,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...
115,,,,,,,✅,,❌,❌,✅
116,,,,,,,✅,,✅,✅,❌
117,,,,,,,✅,,❌,✅,❌
118,,,,,,,✅,,✅,✅,✅


In [33]:
second_pair

Unnamed: 0,Nutrition,Years of Schooling,Cooking Fuel,Housing,Sanitation,Maternal Health,School Attendance,Assets,Drinking Water,Electricity,Child & Adolescent Mortality
0,❌,❌,❌,❌,,,,,,,
1,❌,✅,❌,❌,,,,,,,
2,❌,❌,❌,✅,,,,,,,
3,❌,✅,❌,✅,,,,,,,
4,❌,❌,✅,❌,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...
115,,,,,,,❌,,✅,❌,✅
116,,,,,,,❌,,❌,✅,❌
117,,,,,,,❌,,✅,✅,❌
118,,,,,,,❌,,❌,✅,✅


In [269]:
first_pair.to_csv('first-pair.csv')
second_pair.to_csv('second-pair.csv')

In [34]:
first_pair.to_numpy()

array([['✅', '✅', '❌', ..., '', '', ''],
       ['✅', '❌', '❌', ..., '', '', ''],
       ['✅', '✅', '❌', ..., '', '', ''],
       ...,
       ['', '', '', ..., '❌', '✅', '❌'],
       ['', '', '', ..., '✅', '✅', '✅'],
       ['', '', '', ..., '❌', '✅', '✅']], dtype=object)

In [35]:
second_pair.to_numpy()

array([['❌', '❌', '❌', ..., '', '', ''],
       ['❌', '✅', '❌', ..., '', '', ''],
       ['❌', '❌', '❌', ..., '', '', ''],
       ...,
       ['', '', '', ..., '✅', '✅', '❌'],
       ['', '', '', ..., '❌', '✅', '✅'],
       ['', '', '', ..., '✅', '✅', '✅']], dtype=object)

In [36]:
N24_fp = first_pair.to_numpy()[1::2]
N24_sp = second_pair.to_numpy()[1::2]

In [273]:
N24_fp

array([['✅', '❌', '❌', '❌', '', '', '', '', '', '', ''],
       ['✅', '❌', '❌', '✅', '', '', '', '', '', '', ''],
       ['✅', '❌', '✅', '❌', '', '', '', '', '', '', ''],
       ['✅', '❌', '✅', '✅', '', '', '', '', '', '', ''],
       ['✅', '❌', '', '', '❌', '❌', '', '', '', '', ''],
       ['✅', '❌', '', '', '❌', '✅', '', '', '', '', ''],
       ['✅', '✅', '', '', '❌', '❌', '', '', '', '', ''],
       ['✅', '✅', '', '', '❌', '✅', '', '', '', '', ''],
       ['❌', '', '', '', '✅', '', '❌', '❌', '', '', ''],
       ['❌', '', '', '', '✅', '', '❌', '✅', '', '', ''],
       ['✅', '', '', '', '✅', '', '❌', '❌', '', '', ''],
       ['✅', '', '', '', '✅', '', '❌', '✅', '', '', ''],
       ['❌', '❌', '', '', '', '', '', '✅', '❌', '', ''],
       ['❌', '✅', '', '', '', '', '', '✅', '❌', '', ''],
       ['✅', '❌', '', '', '', '', '', '✅', '❌', '', ''],
       ['✅', '✅', '', '', '', '', '', '✅', '❌', '', ''],
       ['✅', '❌', '', '', '', '', '', '', '', '❌', '❌'],
       ['✅', '❌', '', '', '', '

In [274]:
N24_sp

array([['❌', '✅', '❌', '❌', '', '', '', '', '', '', ''],
       ['❌', '✅', '❌', '✅', '', '', '', '', '', '', ''],
       ['❌', '✅', '✅', '❌', '', '', '', '', '', '', ''],
       ['❌', '✅', '✅', '✅', '', '', '', '', '', '', ''],
       ['❌', '❌', '', '', '✅', '❌', '', '', '', '', ''],
       ['❌', '❌', '', '', '✅', '✅', '', '', '', '', ''],
       ['❌', '✅', '', '', '✅', '❌', '', '', '', '', ''],
       ['❌', '✅', '', '', '✅', '✅', '', '', '', '', ''],
       ['❌', '', '', '', '❌', '', '✅', '❌', '', '', ''],
       ['❌', '', '', '', '❌', '', '✅', '✅', '', '', ''],
       ['✅', '', '', '', '❌', '', '✅', '❌', '', '', ''],
       ['✅', '', '', '', '❌', '', '✅', '✅', '', '', ''],
       ['❌', '❌', '', '', '', '', '', '❌', '✅', '', ''],
       ['❌', '✅', '', '', '', '', '', '❌', '✅', '', ''],
       ['✅', '❌', '', '', '', '', '', '❌', '✅', '', ''],
       ['✅', '✅', '', '', '', '', '', '❌', '✅', '', ''],
       ['❌', '✅', '', '', '', '', '', '', '', '❌', '❌'],
       ['❌', '✅', '', '', '', '

In [275]:
N24_fp = [[x for x in row] for row in N24_fp]
N24_sp = [[x for x in row] for row in N24_sp]

In [276]:
N24_fp = [list(zip(row, poverty_indicators)) for row in N24_fp]
N24_sp = [list(zip(row, poverty_indicators)) for row in N24_sp]

In [277]:
N24_fp

[[('✅', 'Nutrition'),
  ('❌', 'Years of Schooling'),
  ('❌', 'Cooking Fuel'),
  ('❌', 'Housing'),
  ('', 'Sanitation'),
  ('', 'Maternal Health'),
  ('', 'School Attendance'),
  ('', 'Assets'),
  ('', 'Drinking Water'),
  ('', 'Electricity'),
  ('', 'Child & Adolescent Mortality')],
 [('✅', 'Nutrition'),
  ('❌', 'Years of Schooling'),
  ('❌', 'Cooking Fuel'),
  ('✅', 'Housing'),
  ('', 'Sanitation'),
  ('', 'Maternal Health'),
  ('', 'School Attendance'),
  ('', 'Assets'),
  ('', 'Drinking Water'),
  ('', 'Electricity'),
  ('', 'Child & Adolescent Mortality')],
 [('✅', 'Nutrition'),
  ('❌', 'Years of Schooling'),
  ('✅', 'Cooking Fuel'),
  ('❌', 'Housing'),
  ('', 'Sanitation'),
  ('', 'Maternal Health'),
  ('', 'School Attendance'),
  ('', 'Assets'),
  ('', 'Drinking Water'),
  ('', 'Electricity'),
  ('', 'Child & Adolescent Mortality')],
 [('✅', 'Nutrition'),
  ('❌', 'Years of Schooling'),
  ('✅', 'Cooking Fuel'),
  ('✅', 'Housing'),
  ('', 'Sanitation'),
  ('', 'Maternal Health'),
 

In [278]:
N24_fp = [[' '.join(tup) for tup in row if tup[0] != ''] for row in N24_fp]
N24_sp = [[' '.join(tup) for tup in row if tup[0] != ''] for row in N24_sp]

In [279]:
N24_fp

[['✅ Nutrition', '❌ Years of Schooling', '❌ Cooking Fuel', '❌ Housing'],
 ['✅ Nutrition', '❌ Years of Schooling', '❌ Cooking Fuel', '✅ Housing'],
 ['✅ Nutrition', '❌ Years of Schooling', '✅ Cooking Fuel', '❌ Housing'],
 ['✅ Nutrition', '❌ Years of Schooling', '✅ Cooking Fuel', '✅ Housing'],
 ['✅ Nutrition', '❌ Years of Schooling', '❌ Sanitation', '❌ Maternal Health'],
 ['✅ Nutrition', '❌ Years of Schooling', '❌ Sanitation', '✅ Maternal Health'],
 ['✅ Nutrition', '✅ Years of Schooling', '❌ Sanitation', '❌ Maternal Health'],
 ['✅ Nutrition', '✅ Years of Schooling', '❌ Sanitation', '✅ Maternal Health'],
 ['❌ Nutrition', '✅ Sanitation', '❌ School Attendance', '❌ Assets'],
 ['❌ Nutrition', '✅ Sanitation', '❌ School Attendance', '✅ Assets'],
 ['✅ Nutrition', '✅ Sanitation', '❌ School Attendance', '❌ Assets'],
 ['✅ Nutrition', '✅ Sanitation', '❌ School Attendance', '✅ Assets'],
 ['❌ Nutrition', '❌ Years of Schooling', '✅ Assets', '❌ Drinking Water'],
 ['❌ Nutrition', '✅ Years of Schooling', '

In [280]:
N24_sp

[['❌ Nutrition', '✅ Years of Schooling', '❌ Cooking Fuel', '❌ Housing'],
 ['❌ Nutrition', '✅ Years of Schooling', '❌ Cooking Fuel', '✅ Housing'],
 ['❌ Nutrition', '✅ Years of Schooling', '✅ Cooking Fuel', '❌ Housing'],
 ['❌ Nutrition', '✅ Years of Schooling', '✅ Cooking Fuel', '✅ Housing'],
 ['❌ Nutrition', '❌ Years of Schooling', '✅ Sanitation', '❌ Maternal Health'],
 ['❌ Nutrition', '❌ Years of Schooling', '✅ Sanitation', '✅ Maternal Health'],
 ['❌ Nutrition', '✅ Years of Schooling', '✅ Sanitation', '❌ Maternal Health'],
 ['❌ Nutrition', '✅ Years of Schooling', '✅ Sanitation', '✅ Maternal Health'],
 ['❌ Nutrition', '❌ Sanitation', '✅ School Attendance', '❌ Assets'],
 ['❌ Nutrition', '❌ Sanitation', '✅ School Attendance', '✅ Assets'],
 ['✅ Nutrition', '❌ Sanitation', '✅ School Attendance', '❌ Assets'],
 ['✅ Nutrition', '❌ Sanitation', '✅ School Attendance', '✅ Assets'],
 ['❌ Nutrition', '❌ Years of Schooling', '❌ Assets', '✅ Drinking Water'],
 ['❌ Nutrition', '✅ Years of Schooling', '

In [281]:
N24_fp = [', '.join(row) for row in N24_fp]
N24_sp = [', '.join(row) for row in N24_sp]

In [282]:
N24_fp

['✅ Nutrition, ❌ Years of Schooling, ❌ Cooking Fuel, ❌ Housing',
 '✅ Nutrition, ❌ Years of Schooling, ❌ Cooking Fuel, ✅ Housing',
 '✅ Nutrition, ❌ Years of Schooling, ✅ Cooking Fuel, ❌ Housing',
 '✅ Nutrition, ❌ Years of Schooling, ✅ Cooking Fuel, ✅ Housing',
 '✅ Nutrition, ❌ Years of Schooling, ❌ Sanitation, ❌ Maternal Health',
 '✅ Nutrition, ❌ Years of Schooling, ❌ Sanitation, ✅ Maternal Health',
 '✅ Nutrition, ✅ Years of Schooling, ❌ Sanitation, ❌ Maternal Health',
 '✅ Nutrition, ✅ Years of Schooling, ❌ Sanitation, ✅ Maternal Health',
 '❌ Nutrition, ✅ Sanitation, ❌ School Attendance, ❌ Assets',
 '❌ Nutrition, ✅ Sanitation, ❌ School Attendance, ✅ Assets',
 '✅ Nutrition, ✅ Sanitation, ❌ School Attendance, ❌ Assets',
 '✅ Nutrition, ✅ Sanitation, ❌ School Attendance, ✅ Assets',
 '❌ Nutrition, ❌ Years of Schooling, ✅ Assets, ❌ Drinking Water',
 '❌ Nutrition, ✅ Years of Schooling, ✅ Assets, ❌ Drinking Water',
 '✅ Nutrition, ❌ Years of Schooling, ✅ Assets, ❌ Drinking Water',
 '✅ Nutrition,

In [283]:
N24_sp

['❌ Nutrition, ✅ Years of Schooling, ❌ Cooking Fuel, ❌ Housing',
 '❌ Nutrition, ✅ Years of Schooling, ❌ Cooking Fuel, ✅ Housing',
 '❌ Nutrition, ✅ Years of Schooling, ✅ Cooking Fuel, ❌ Housing',
 '❌ Nutrition, ✅ Years of Schooling, ✅ Cooking Fuel, ✅ Housing',
 '❌ Nutrition, ❌ Years of Schooling, ✅ Sanitation, ❌ Maternal Health',
 '❌ Nutrition, ❌ Years of Schooling, ✅ Sanitation, ✅ Maternal Health',
 '❌ Nutrition, ✅ Years of Schooling, ✅ Sanitation, ❌ Maternal Health',
 '❌ Nutrition, ✅ Years of Schooling, ✅ Sanitation, ✅ Maternal Health',
 '❌ Nutrition, ❌ Sanitation, ✅ School Attendance, ❌ Assets',
 '❌ Nutrition, ❌ Sanitation, ✅ School Attendance, ✅ Assets',
 '✅ Nutrition, ❌ Sanitation, ✅ School Attendance, ❌ Assets',
 '✅ Nutrition, ❌ Sanitation, ✅ School Attendance, ✅ Assets',
 '❌ Nutrition, ❌ Years of Schooling, ❌ Assets, ✅ Drinking Water',
 '❌ Nutrition, ✅ Years of Schooling, ❌ Assets, ✅ Drinking Water',
 '✅ Nutrition, ❌ Years of Schooling, ❌ Assets, ✅ Drinking Water',
 '✅ Nutrition,

In [284]:
N24_CC = list(zip(N24_fp, N24_sp))

In [285]:
N24_CC

[('✅ Nutrition, ❌ Years of Schooling, ❌ Cooking Fuel, ❌ Housing',
  '❌ Nutrition, ✅ Years of Schooling, ❌ Cooking Fuel, ❌ Housing'),
 ('✅ Nutrition, ❌ Years of Schooling, ❌ Cooking Fuel, ✅ Housing',
  '❌ Nutrition, ✅ Years of Schooling, ❌ Cooking Fuel, ✅ Housing'),
 ('✅ Nutrition, ❌ Years of Schooling, ✅ Cooking Fuel, ❌ Housing',
  '❌ Nutrition, ✅ Years of Schooling, ✅ Cooking Fuel, ❌ Housing'),
 ('✅ Nutrition, ❌ Years of Schooling, ✅ Cooking Fuel, ✅ Housing',
  '❌ Nutrition, ✅ Years of Schooling, ✅ Cooking Fuel, ✅ Housing'),
 ('✅ Nutrition, ❌ Years of Schooling, ❌ Sanitation, ❌ Maternal Health',
  '❌ Nutrition, ❌ Years of Schooling, ✅ Sanitation, ❌ Maternal Health'),
 ('✅ Nutrition, ❌ Years of Schooling, ❌ Sanitation, ✅ Maternal Health',
  '❌ Nutrition, ❌ Years of Schooling, ✅ Sanitation, ✅ Maternal Health'),
 ('✅ Nutrition, ✅ Years of Schooling, ❌ Sanitation, ❌ Maternal Health',
  '❌ Nutrition, ✅ Years of Schooling, ✅ Sanitation, ❌ Maternal Health'),
 ('✅ Nutrition, ✅ Years of School

In [286]:
N24_CC_fin = ['\n'.join(row) for row in N24_CC]

In [287]:
print(N24_CC_fin[0])

✅ Nutrition, ❌ Years of Schooling, ❌ Cooking Fuel, ❌ Housing
❌ Nutrition, ✅ Years of Schooling, ❌ Cooking Fuel, ❌ Housing


In [289]:
print(f'Number of people who will span the entire choice sets i.e. 60 is {len(N24_CC_fin)//12}')

Number of people who will span the entire choice sets i.e. 60 is 5


In [290]:
for i in range(5):
  print()
  print('***********')
  print(f'Candidate {i+1}')
  print('***********')
  print()
  questions_to_ask = N24_CC_fin[i::5].copy()
  random.shuffle(questions_to_ask)
  for qsn_no, question in enumerate(questions_to_ask):
    print()
    print(f'Question-{qsn_no+1}:')
    print(question)
  


***********
Candidate 1
***********


Question-1:
✅ Housing, ❌ Sanitation, ❌ School Attendance, ✅ Drinking Water
❌ Housing, ✅ Sanitation, ❌ School Attendance, ✅ Drinking Water

Question-2:
✅ Cooking Fuel, ✅ Assets, ✅ Drinking Water, ❌ Electricity
❌ Cooking Fuel, ✅ Assets, ✅ Drinking Water, ✅ Electricity

Question-3:
✅ Housing, ❌ Maternal Health, ❌ Assets, ❌ Child & Adolescent Mortality
❌ Housing, ✅ Maternal Health, ❌ Assets, ❌ Child & Adolescent Mortality

Question-4:
✅ Cooking Fuel, ❌ Housing, ❌ Sanitation, ✅ Maternal Health
❌ Cooking Fuel, ✅ Housing, ❌ Sanitation, ✅ Maternal Health

Question-5:
✅ Years of Schooling, ❌ Maternal Health, ❌ School Attendance, ❌ Drinking Water
❌ Years of Schooling, ❌ Maternal Health, ❌ School Attendance, ✅ Drinking Water

Question-6:
✅ Maternal Health, ✅ School Attendance, ❌ Electricity, ❌ Child & Adolescent Mortality
❌ Maternal Health, ✅ School Attendance, ❌ Electricity, ✅ Child & Adolescent Mortality

Question-7:
✅ Maternal Health, ❌ Assets, ✅ Drinking

In [295]:
for i in range(5):
  print(f'The questions asked to Candidate-{i+1}') # shuffled
  print(list(range(1,61))[i::5])
  print()

The questions asked to Candidate-1
[1, 6, 11, 16, 21, 26, 31, 36, 41, 46, 51, 56]

The questions asked to Candidate-2
[2, 7, 12, 17, 22, 27, 32, 37, 42, 47, 52, 57]

The questions asked to Candidate-3
[3, 8, 13, 18, 23, 28, 33, 38, 43, 48, 53, 58]

The questions asked to Candidate-4
[4, 9, 14, 19, 24, 29, 34, 39, 44, 49, 54, 59]

The questions asked to Candidate-5
[5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60]



### Processing Response Data

In [175]:
d = pd.read_excel('raw_dataset.xlsx')

#### Contrastive Comparison

In [176]:
des = np.zeros((*d.iloc[:,6:66].to_numpy().shape, len(poverty_indicators)))

In [177]:
poverty_indicators[0] = 'Nutritious Meal'

In [178]:
poverty_indicators

['Nutritious Meal',
 'Years of Schooling',
 'Cooking Fuel',
 'Housing',
 'Sanitation',
 'Maternal Health',
 'School Attendance',
 'Assets',
 'Drinking Water',
 'Electricity',
 'Child & Adolescent Mortality']

In [179]:
s = '❌ Housing, ✅ Sanitation, ❌ School Attendance, ✅ Drinking Water'

In [180]:
'❌ Housing'.split(' ')

['❌', 'Housing']

In [181]:
map_emojis_to_label = {
  '✅': 1,
  '❌': -1,
  '': 0
}

In [182]:
[(1 if choice == '✅' else -1, poverty_indicators.index(indicator)) for choice, indicator in [tuple([x for x in ch_ind.split(' ', 1)]) for ch_ind in s.split(', ')]]

[(-1, 3), (1, 4), (-1, 6), (1, 8)]

In [183]:
def conv_str_to_row(s):
  ddd = [0] * len(poverty_indicators)
  if s is np.nan or s == '':
    return pd.Series([ddd])
  for x, pos in [(1 if choice == '✅' else -1, poverty_indicators.index(indicator)) for choice, indicator in [tuple([x for x in ch_ind.split(' ', 1)]) for ch_ind in s.split(', ')]]:
    ddd[pos] = x
  return pd.Series([ddd])

In [184]:
conv_str_to_row('')

0    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
dtype: object

In [185]:
# changing the strings to respective lists
d.iloc[:,6:66] = d.iloc[:,6:66].apply(np.vectorize(conv_str_to_row))

In [186]:
raw_cc_data = d.iloc[:,6:66].to_numpy()

In [187]:
raw_cc_data

array([[0    [0, 0, 0, -1, 1, 0, -1, 0, 1, 0, 0]
        dtype: object                           ,
        0    [0, 0, -1, 0, 0, 0, 0, 1, 1, 1, 0]
        dtype: object                          ,
        0    [0, 0, 0, -1, 0, 1, 0, -1, 0, 0, -1]
        dtype: object                            , ...,
        0    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
        dtype: object                         ,
        0    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
        dtype: object                         ,
        0    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
        dtype: object                         ],
       [0    [0, 0, 0, -1, 1, 0, -1, 0, 1, 0, 0]
        dtype: object                           ,
        0    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
        dtype: object                         ,
        0    [0, 0, 0, -1, 0, 1, 0, -1, 0, 0, -1]
        dtype: object                            , ...,
        0    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
        dtype: object                         ,
        0    [0, 

In [193]:
for i in range(raw_cc_data.shape[0]):
  for j in range(raw_cc_data.shape[1]):
    des[i,j] = np.array((raw_cc_data[i,j].to_numpy()[0]))

In [201]:
des = des.astype(int)

In [202]:
des.shape

(271, 60, 11)

In [203]:
des[0]

array([[ 0,  0,  0, -1,  1,  0, -1,  0,  1,  0,  0],
       [ 0,  0, -1,  0,  0,  0,  0,  1,  1,  1,  0],
       [ 0,  0,  0, -1,  0,  1,  0, -1,  0,  0, -1],
       [ 0,  0, -1,  1, -1,  1,  0,  0,  0,  0,  0],
       [ 0,  1,  0,  0,  0, -1, -1,  0, -1,  0,  0],
       [ 0,  0,  0,  0,  0, -1,  1,  0,  0, -1,  1],
       [ 0,  0,  0,  0,  0, -1,  0,  1,  1,  0,  1],
       [ 0,  0, -1,  1,  0,  0, -1,  0,  0,  1,  0],
       [ 1,  1,  0,  0,  0,  0,  0,  1, -1,  0,  0],
       [-1,  1, -1, -1,  0,  0,  0,  0,  0,  0,  0],
       [-1, -1,  0,  0,  1,  1,  0,  0,  0,  0,  0],
       [ 1,  0,  0,  0, -1,  0,  1, -1,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0

In [219]:
f = lambda x: np.array([[map_emojis_to_label[z] for z in y] for y in x])

In [221]:
N24_fp_int = f(N24_fp)
N24_sp_int = f(N24_sp)

In [297]:
candidates = list()
n = len(poverty_indicators)
for candix, responses in enumerate(des):
  
  # candidate_id (starts from 1),s1,....,s11,t1,...,t11,choice (0-first alternative chosen, 1-second alternative chosen)
  # a row with s1,.....,t11,choice full of only zeroes means missing response
  resp = np.zeros((12, n*2 + 2))
  resp[:,0] = candix+1
  
  k = 0
  for row in responses:
    
    if len(row.nonzero()[0]) == 0:
      if (k > 0) and (k < 11): # gaurd against missing response
        k += 1
      continue
      
    idx = np.array([x.all() for x in N24_fp_int == row]).nonzero()[0]
    if len(idx) > 0:
      resp[k][1:(n+1)] = row
      resp[k][(n+1):-1] = N24_sp_int[idx[0]]
      resp[k][-1] = 0
    else:
      idx = np.array([x.all() for x in N24_sp_int == row]).nonzero()[0][0]
      resp[k][-1] = 1
      resp[k][(n+1):-1] = row
      resp[k][1:(n+1)] = N24_fp_int[idx]
    k += 1
  
  candidates.append(resp)

In [299]:
candidates[1][1][0], candidates[1][1][1:(n+1)], candidates[1][1][(n+1):-1], candidates[1][1][-1]

(2.0,
 array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]),
 array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]),
 0.0)

In [300]:
candidates[1][2][0], candidates[1][2][1:(n+1)], candidates[1][2][(n+1):-1], candidates[1][2][-1]

(2.0,
 array([ 0.,  0.,  0.,  1.,  0., -1.,  0., -1.,  0.,  0., -1.]),
 array([ 0.,  0.,  0., -1.,  0.,  1.,  0., -1.,  0.,  0., -1.]),
 1.0)

In [301]:
candidates[0].shape

(12, 24)

In [302]:
candidates[-1]

array([[271.,   1.,  -1.,   0.,   0.,  -1.,  -1.,   0.,   0.,   0.,   0.,
          0.,  -1.,  -1.,   0.,   0.,   1.,  -1.,   0.,   0.,   0.,   0.,
          0.,   0.],
       [271.,   0.,   0.,   0.,   1.,  -1.,   0.,  -1.,   0.,  -1.,   0.,
          0.,   0.,   0.,   0.,  -1.,   1.,   0.,  -1.,   0.,  -1.,   0.,
          0.,   0.],
       [271.,   0.,   0.,   1.,  -1.,  -1.,  -1.,   0.,   0.,   0.,   0.,
          0.,   0.,   0.,  -1.,   1.,  -1.,  -1.,   0.,   0.,   0.,   0.,
          0.,   1.],
       [271.,   0.,   0.,   0.,   0.,   0.,   0.,   1.,   0.,  -1.,   1.,
          1.,   0.,   0.,   0.,   0.,   0.,   0.,  -1.,   0.,   1.,   1.,
          1.,   1.],
       [271.,   1.,  -1.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   1.,
          1.,  -1.,   1.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   1.,
          1.,   0.],
       [271.,   0.,   0.,   0.,   0.,   0.,   1.,  -1.,   0.,   0.,   1.,
         -1.,   0.,   0.,   0.,   0.,   0.,  -1.,  -1.,   0.,   0.,   1.,
       

In [303]:
response_data = np.concatenate(candidates)

In [304]:
response_data.shape

(3252, 24)

In [305]:
d.iloc[:,:6]

Unnamed: 0,Timestamp,Age (It must be greater than 18),Gender,Profession,Education level,Sample number is of the form?
0,2023-03-11 09:33:17.461,21,Female,Student,12th Pass,5k+1
1,2023-03-11 10:04:35.075,52,Male,Service,10th pass,5k+1
2,2023-03-11 10:06:10.084,36,Male,Business,Below 10th,5k+1
3,2023-03-11 10:18:48.844,29,Male,Tuition teacher,Post Graduate,5k+2
4,2023-03-11 10:19:13.034,21,Female,Housewife,10th pass,5k
...,...,...,...,...,...,...
266,2023-03-23 11:00:49.689,36,Female,Housewife,12th Pass,5k+4
267,2023-03-23 11:04:39.825,68,Male,Panchayat Leader,12th Pass,5k+2
268,2023-03-23 11:06:58.784,37,Male,Clerk,10th pass,5k+3
269,2023-03-23 11:09:08.933,57,Female,Housewife,Below 10th,5k+1


In [306]:
d.iloc[:,:6].loc[d.index.repeat(12)].to_numpy().shape

(3252, 6)

In [307]:
pd.DataFrame(response_data)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,14,15,16,17,18,19,20,21,22,23
0,1.0,0.0,0.0,0.0,1.0,-1.0,0.0,-1.0,0.0,1.0,...,0.0,-1.0,1.0,0.0,-1.0,0.0,1.0,0.0,0.0,1.0
1,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,...,-1.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0
2,1.0,0.0,0.0,0.0,1.0,0.0,-1.0,0.0,-1.0,0.0,...,0.0,-1.0,0.0,1.0,0.0,-1.0,0.0,0.0,-1.0,1.0
3,1.0,0.0,0.0,1.0,-1.0,-1.0,1.0,0.0,0.0,0.0,...,-1.0,1.0,-1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0
4,1.0,0.0,1.0,0.0,0.0,0.0,-1.0,-1.0,0.0,-1.0,...,0.0,0.0,0.0,-1.0,-1.0,0.0,1.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3247,271.0,1.0,-1.0,0.0,0.0,0.0,0.0,0.0,1.0,-1.0,...,0.0,0.0,0.0,0.0,0.0,-1.0,1.0,0.0,0.0,1.0
3248,271.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,...,1.0,0.0,1.0,0.0,0.0,0.0,0.0,-1.0,1.0,0.0
3249,271.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,-1.0,...,-1.0,0.0,0.0,0.0,0.0,1.0,-1.0,1.0,0.0,0.0
3250,271.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,-1.0,1.0,...,0.0,0.0,0.0,-1.0,0.0,1.0,1.0,0.0,-1.0,1.0


#### Candidate Details, Addition Questions and Surveyor's Section

In [309]:
map_cats_to_levels = {
  'Female': 0,
  'Male': 1,
  'Below 10th': 0,
  '10th pass': 1,
  '12th Pass': 2,
  'Graduate': 3,
  'Post Graduate': 4,
  'PhD': 5,
  '5k+1': 1,
  '5k+2': 2,
  '5k+3': 3,
  '5k+4': 4,
  '5k': 5,
  'No': 0,
  'Yes': 1,
  'Not Poor': 0,
  'Poor': 1
}

In [437]:
candidate_details = [
  d.iloc[:,:6].loc[d.index.repeat(12)].replace(map_cats_to_levels).to_numpy(),
  d.iloc[:,72:].loc[d.index.repeat(12)].replace(map_cats_to_levels).to_numpy()
]

#### Perturbation Performance

In [371]:
pp_data = d.iloc[:,66:72]

In [372]:
pp_data

Unnamed: 0,Education,Nutrition,Housing,Assets,Health,Drinking Water
0,"Attended school upto class 10, Attended school...","Get food twice in a day, Get food thrice in a ...","Living in a sustainable kuccha house, Living i...","Has telephone but neither TV nor computer, Has...",Nearby health center is not available but has ...,Has water connection in the household and gets...
1,,,,,,
2,"Attended school upto class 8, Attended school ...","Get food twice in a day, Get food thrice in a ...","Living in a sustainable kuccha house, Living i...","Has TV, telephone but not computer, Has teleph...","Neither there is any nearby health center, nor...",Has water connection in the household and gets...
3,"Attended school upto class 8, Attended school ...","Get food twice in a day, Get food thrice in a ...","Living in a sustainable kuccha house, Living i...","Has TV, telephone but not computer, Has teleph...",Nearby health center is not available but has ...,Has water connection in the household and gets...
4,"Attended school upto class 10, Attended school...","Get food twice in a day, Get food thrice in a ...",Living in an unsustainable pucca house,"Has telephone but neither TV nor computer, Has...",Nearby health center is not available but has ...,Has to walk less than 30 minutes for getting w...
...,...,...,...,...,...,...
266,"Attended school upto class 12, Graduated","Get food thrice in a day, Get food four times ...","Living in a less secured tenant, but a sustain...","Has telephone, computer but not TV",Nearby health center is not available but has ...,Has water connection in the household and gets...
267,Graduated,Get food four times a day,Living in a sustainable kuccha house,"Has telephone, computer but not TV",There are nearby health center(s) and can affo...,Has water connection in the household and gets...
268,"Attended school upto class 12, Graduated","Get food thrice in a day, Get food four times ...","Living in an unsustainable pucca house, Living...","Has TV, telephone but not computer, Has teleph...",Nearby health center is not available but has ...,Has water connection in the household but does...
269,"Attended school upto class 12, Graduated","Get food thrice in a day, Get food four times ...","Living in a sustainable kuccha house, Living i...","Has TV, telephone but not computer, Has teleph...",Nearby health center is not available but has ...,Has water connection in the household but does...


In [373]:
pp_data.shape

(271, 6)

In [374]:
pp_des = np.zeros((*pp_data.to_numpy().shape, 4))

In [375]:
pp_des.shape

(271, 6, 4)

In [376]:
pp_levels = {
  'Attended school upto class 8': 0,
  'Attended school upto class 10': 1,
  'Attended school upto class 12': 2,
  'Graduated': 3,
  'Get food once in a day': 0,
  'Get food twice in a day': 1,
  'Get food thrice in a day': 2,
  'Get food four times a day': 3,
  'Has no shelter': 0,
  'Living in a sustainable kuccha house': 1,
  'Living in an unsustainable pucca house': 2,
  'Living in a less secured tenant, but a sustainable pucca house': 3,
  'Has neither TV nor telephone nor computer': 0,
  'Has telephone but neither TV nor computer': 1,
  'Has TV, telephone but not computer': 2,
  'Has telephone, computer but not TV': 3,
  'Neither there is any nearby health center, nor has money to afford medicines': 0,
  'Nearby health center is not available but has money to afford medicines': 1,
  'There are nearby health center(s) but cannot afford medicines': 2,
  'There are nearby health center(s) and can afford medicines': 3,
  'Has to walk more than 30 minutes for getting water': 0,
  'Has to walk less than 30 minutes for getting water': 1,
  'Has water connection in the household but does not get adequate water': 2,
  'Has water connection in the household and gets adequate water': 3
}

In [382]:
s = 'Attended school upto class 10, Attended school upto class 12, Graduated'

In [378]:
s.split(', ')

['Attended school upto class 10', 'Attended school upto class 12', 'Graduated']

In [397]:
[
  pp_levels[choice] for choice in [x for x in s.split(', ') 
                                   if ('tenant' not in x) 
                                   if ('sustainable pucca' not in x)
                                   if x != 'Has TV'
                                   if x != 'telephone but not computer'
                                   if x != 'Has telephone'
                                   if x != 'computer but not TV'
                                   if x != 'Neither there is any nearby health center'
                                   if x != 'nor has money to afford medicines'
                                  ] 
  + (
    ['Living in a less secured tenant, but a sustainable pucca house'] 
    if 'tenant' in s else []
  ) +
  (
    ['Has TV, telephone but not computer'] 
    if 'Has TV, telephone but not computer' in s else []
  ) +
  (
    ['Has telephone, computer but not TV'] 
    if 'Has telephone, computer but not TV' in s else []
  ) + 
  (
    ['Neither there is any nearby health center, nor has money to afford medicines'] 
    if 'Neither there is any nearby health center, nor has money to afford medicines' in s else []
  )
]

[1, 2, 3]

In [398]:
def conv_str_to_row(s):
  ddd = [0] * 4
  if s is np.nan or s == '':
    return pd.Series([ddd])
  iterative = [
    pp_levels[choice] for choice in [x for x in s.split(', ') 
                                     if ('tenant' not in x) 
                                     if ('sustainable pucca' not in x)
                                     if x != 'Has TV'
                                     if x != 'telephone but not computer'
                                     if x != 'Has telephone'
                                     if x != 'computer but not TV'
                                     if x != 'Neither there is any nearby health center'
                                     if x != 'nor has money to afford medicines'
                                    ] 
    + (
      ['Living in a less secured tenant, but a sustainable pucca house'] 
      if 'tenant' in s else []
    ) +
    (
      ['Has TV, telephone but not computer'] 
      if 'Has TV, telephone but not computer' in s else []
    ) +
    (
      ['Has telephone, computer but not TV'] 
      if 'Has telephone, computer but not TV' in s else []
    ) + 
    (
      ['Neither there is any nearby health center, nor has money to afford medicines'] 
      if 'Neither there is any nearby health center, nor has money to afford medicines' in s else []
    )
  ]
  for pos in iterative:
    ddd[pos] = 1
  return pd.Series([ddd])

In [399]:
conv_str_to_row(s)

0    [0, 1, 1, 1]
dtype: object

In [400]:
# changing the strings to respective lists
pp_data = pp_data.apply(np.vectorize(conv_str_to_row))

In [401]:
pp_data = pp_data.to_numpy()

In [402]:
pp_data

array([[0    [0, 1, 1, 1]
        dtype: object    , 0    [0, 1, 1, 1]
                           dtype: object    , 0    [0, 1, 0, 1]
                                              dtype: object    ,
        0    [0, 1, 1, 1]
        dtype: object    , 0    [0, 1, 0, 1]
                           dtype: object    , 0    [0, 0, 0, 1]
                                              dtype: object    ],
       [0    [0, 0, 0, 0]
        dtype: object    , 0    [0, 0, 0, 0]
                           dtype: object    , 0    [0, 0, 0, 0]
                                              dtype: object    ,
        0    [0, 0, 0, 0]
        dtype: object    , 0    [0, 0, 0, 0]
                           dtype: object    , 0    [0, 0, 0, 0]
                                              dtype: object    ],
       [0    [1, 1, 1, 1]
        dtype: object    , 0    [0, 1, 1, 1]
                           dtype: object    , 0    [0, 1, 0, 1]
                                              dtype: object    

In [403]:
for i in range(pp_data.shape[0]):
  for j in range(pp_data.shape[1]):
    pp_des[i,j] = np.array((pp_data[i,j].to_numpy()[0]))

In [404]:
pp_des = pp_des.astype(int)

In [405]:
pp_des.shape

(271, 6, 4)

In [406]:
pp_des

array([[0, 1, 1, 1],
       [0, 1, 1, 1],
       [0, 1, 0, 1],
       [0, 1, 1, 1],
       [0, 1, 0, 1],
       [0, 0, 0, 1]])

In [415]:
# pp_levels.keys() horizontally expanded for each candidate
pp_des = pp_des.reshape(pp_data.shape[0], -1)

In [416]:
pp_des.shape

(271, 24)

In [418]:
pp_data = pd.DataFrame(pp_des)
pp_data.columns = pp_levels.keys()
pp_data

Unnamed: 0,Attended school upto class 8,Attended school upto class 10,Attended school upto class 12,Graduated,Get food once in a day,Get food twice in a day,Get food thrice in a day,Get food four times a day,Has no shelter,Living in a sustainable kuccha house,...,"Has TV, telephone but not computer","Has telephone, computer but not TV","Neither there is any nearby health center, nor has money to afford medicines",Nearby health center is not available but has money to afford medicines,There are nearby health center(s) but cannot afford medicines,There are nearby health center(s) and can afford medicines,Has to walk more than 30 minutes for getting water,Has to walk less than 30 minutes for getting water,Has water connection in the household but does not get adequate water,Has water connection in the household and gets adequate water
0,0,1,1,1,0,1,1,1,0,1,...,1,1,0,1,0,1,0,0,0,1
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,1,1,1,0,1,1,1,0,1,...,1,1,1,1,0,1,0,0,0,1
3,1,1,1,1,0,1,1,1,0,1,...,1,1,0,1,0,1,0,0,0,1
4,0,1,1,1,0,1,1,1,0,0,...,1,1,0,1,1,1,0,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
266,0,0,1,1,0,0,1,1,0,0,...,0,1,0,1,0,1,0,0,0,1
267,0,0,0,1,0,0,0,1,0,1,...,0,1,0,0,0,1,0,0,0,1
268,0,0,1,1,0,0,1,1,0,0,...,1,1,0,1,0,1,0,0,1,1
269,0,0,1,1,0,0,1,1,0,1,...,1,1,0,1,0,1,0,0,1,1


In [422]:
perturbation_performance = pp_data.loc[pp_data.index.repeat(12)].to_numpy()

In [423]:
perturbation_performance

array([[0, 1, 1, ..., 0, 0, 1],
       [0, 1, 1, ..., 0, 0, 1],
       [0, 1, 1, ..., 0, 0, 1],
       ...,
       [0, 0, 1, ..., 0, 0, 1],
       [0, 0, 1, ..., 0, 0, 1],
       [0, 0, 1, ..., 0, 0, 1]])

#### Merging Everything

In [438]:
candidate_details[0].shape

(3252, 6)

In [439]:
response_data.shape

(3252, 24)

In [440]:
perturbation_performance.shape

(3252, 24)

In [441]:
candidate_details[1].shape

(3252, 7)

In [442]:
final_data = np.concatenate(
  [candidate_details[0], response_data, perturbation_performance, candidate_details[1]], axis = -1)

In [443]:
final_data.shape

(3252, 61)

In [444]:
fin_data = pd.DataFrame(final_data)

In [447]:
fin_data.columns = [
  'Timestamp',
  'Age',
  'Gender',
  'Profession',
  'Education',
  'SampleID',
  'CandidateID',
  *[x+'1' for x in poverty_indicators],
  *[x+'2' for x in poverty_indicators],
  'Choice',
  *list(pp_levels.keys()),
  'Without Bank Account means Poor',
  'Without Internet means Poor',
  'Time Distribution for Leisure Paid and Unpaid Work',
  'IsCandidatePoor',
  'Location',
  'IsLocalityPoor',
  'LocalityLacking'
]

In [449]:
candidate_details[-1]

array([[1.0, 1.0, nan, ..., 'Medinipur ', 0.0, 0],
       [1.0, 1.0, nan, ..., 'Medinipur ', 0.0, 0],
       [1.0, 1.0, nan, ..., 'Medinipur ', 0.0, 0],
       ...,
       [nan, nan, nan, ..., 'Paschim Midnapore ', 0.0, nan],
       [nan, nan, nan, ..., 'Paschim Midnapore ', 0.0, nan],
       [nan, nan, nan, ..., 'Paschim Midnapore ', 0.0, nan]], dtype=object)

In [451]:
fin_data.to_csv('Response-Data-Cleaned-Ver1.csv')

Unnamed: 0,Timestamp,Age,Gender,Profession,Education,SampleID,CandidateID,Nutritious Meal1,Years of Schooling1,Cooking Fuel1,...,Has to walk less than 30 minutes for getting water,Has water connection in the household but does not get adequate water,Has water connection in the household and gets adequate water,Without Bank Account means Poor,Without Internet means Poor,Time Distribution for Leisure Paid and Unpaid Work,IsCandidatePoor,Location,IsLocalityPoor,LocalityLacking
0,2023-03-11 09:33:17.461,21,0,Student,2,1,1.0,0.0,0.0,0.0,...,0,0,1,1.0,1.0,,Not poor,Medinipur,0.0,0
1,2023-03-11 09:33:17.461,21,0,Student,2,1,1.0,0.0,0.0,1.0,...,0,0,1,1.0,1.0,,Not poor,Medinipur,0.0,0
2,2023-03-11 09:33:17.461,21,0,Student,2,1,1.0,0.0,0.0,0.0,...,0,0,1,1.0,1.0,,Not poor,Medinipur,0.0,0
3,2023-03-11 09:33:17.461,21,0,Student,2,1,1.0,0.0,0.0,1.0,...,0,0,1,1.0,1.0,,Not poor,Medinipur,0.0,0
4,2023-03-11 09:33:17.461,21,0,Student,2,1,1.0,0.0,1.0,0.0,...,0,0,1,1.0,1.0,,Not poor,Medinipur,0.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3247,2023-03-23 11:11:51.632,31,1,Small Businessman,1,5,271.0,1.0,-1.0,0.0,...,0,0,1,,,,Not poor,Paschim Midnapore,0.0,
3248,2023-03-23 11:11:51.632,31,1,Small Businessman,1,5,271.0,0.0,0.0,1.0,...,0,0,1,,,,Not poor,Paschim Midnapore,0.0,
3249,2023-03-23 11:11:51.632,31,1,Small Businessman,1,5,271.0,0.0,0.0,1.0,...,0,0,1,,,,Not poor,Paschim Midnapore,0.0,
3250,2023-03-23 11:11:51.632,31,1,Small Businessman,1,5,271.0,0.0,0.0,0.0,...,0,0,1,,,,Not poor,Paschim Midnapore,0.0,
