In [2]:
# Read the variable maps back in and work on cleaning up/sorting:
import json

with open('variable-maps.json', 'r') as fp:
    var_dict = json.load(fp)

In [3]:
print(json.dumps(var_dict, indent=4))

{
    "V5": {
        "99": "NA"
    },
    "V6": {
        "99": "NA"
    },
    "V7": {
        "90": "-",
        "91": "-",
        "99": "NA"
    },
    "V8": {
        "99": "NA"
    },
    "V9": {
        "99": "NA"
    },
    "V10": {
        "90": "-",
        "91": "-",
        "99": "NA"
    },
    "V11": {
        "999": "NA"
    },
    "V12": {
        "99": "NA"
    },
    "V13": {
        "1": "MALE",
        "2": "FEMALE",
        "9": "NA"
    },
    "V14": {
        "1": "R HAS SPOUSE IN HHL",
        "2": "R HAS PARTNER IN HHL",
        "5": "ALL OTHERS",
        "9": "NA"
    },
    "V15": {
        "99": "NA"
    },
    "V101": {
        "1": "EXCELLENT",
        "2": "VERY GOOD",
        "3": "GOOD",
        "4": "FAIR",
        "5": "POOR",
        "8": "DK",
        "9": "NA"
    },
    "V102": {
        "1": "EXCELLENT",
        "2": "VERY GOOD",
        "3": "GOOD",
        "4": "FAIR",
        "5": "POOR",
        "8": "DK",
        "9": "NA"
    },
    "V103

In [4]:
import numpy as np

In [5]:
def try_to_classify_variables(variables_dict):
    na = set(['NA'])
    dk_na = set(['DK', 'NA'])
    inap_dk_na = set(['INAP', 'DK', 'NA'])
    yes_or_no = set(['YES', 'NO', 'INAP', 'DK', 'NA'])
    history = set(['PAST', 'AGO'])
    checked = set(['CHECKED'])
    number_words = set(['ONE', '95'])
    
    variable_type = {
        'yes_or_no': [],
        'checked': [],
        'history': [],
        'inap_dk_na': [],
        'maybe_numerical': [],
        'unsure': [],
    }
    
    new_variable_dict = {}

    for variable in variables_dict.keys():
        v_dict = variables_dict[variable]
        # remove any dashes from values
        reduced_dict = {int(key):value for key, value in v_dict.items() if value != "-"}
        
        # is the dictionary empty after reduction?
        if not reduced_dict:
            variable_type['maybe_numerical'].append(variable)
        else:
            value_words = {word for value in reduced_dict.values() for word in value.split()}
            print(variable, value_words)
            if value_words == na:
                variable_type['maybe_numerical'].append(variable)
            elif value_words == dk_na:
                variable_type['maybe_numerical'].append(variable)
            elif value_words == inap_dk_na:
                variable_type['inap_dk_na'].append(variable)
            elif value_words == yes_or_no:
                variable_type['yes_or_no'].append(variable)
            elif not checked.isdisjoint(value_words):
                # print("in checked")
                variable_type['checked'].append(variable)
            elif not history.isdisjoint(value_words):
                # print("in history")
                variable_type['history'].append(variable)
            elif not number_words.isdisjoint(value_words):
                variable_type['maybe_numerical'].append(variable)
            else:
                variable_type['unsure'].append(variable)
        
        new_variable_dict[variable] = reduced_dict
    
    return new_variable_dict, variable_type   
            

In [6]:
new_variable_dict, var_types = try_to_classify_variables(var_dict)

V5 {'NA'}
V6 {'NA'}
V7 {'NA'}
V8 {'NA'}
V9 {'NA'}
V10 {'NA'}
V11 {'NA'}
V12 {'NA'}
V13 {'NA', 'FEMALE', 'MALE'}
V14 {'HHL', 'HAS', 'R', 'ALL', 'OTHERS', 'PARTNER', 'IN', 'NA', 'SPOUSE'}
V15 {'NA'}
V101 {'FAIR', 'DK', 'POOR', 'NA', 'EXCELLENT', 'VERY', 'GOOD'}
V102 {'FAIR', 'DK', 'POOR', 'NA', 'EXCELLENT', 'VERY', 'GOOD'}
V103 {'BETTER', 'DK', 'IF', 'VOL.', 'NA', 'WORSE'}
V104 {'DK', 'TIME', 'THE', 'OF', 'RARELY', 'NEVER', 'NA', 'ALWAYS', 'MOST', 'SOMETIMES'}
V105 {'NA', 'DK'}
V106 {'DK', 'OCCASIONALLY', 'REGULARLY', 'SELDOM', 'NEVER', 'NA'}
V107 {'HHL', 'SAMPLE', 'IW', 'R', 'MENTIONED', 'ALL', 'OTHERS', 'MAIN', 'IN', 'SPOUSE', 'INAP,'}
V108 {'MARRIED;', 'WIDOWED', 'DK', 'SEPARATED', '107', 'IN', 'NEVER', 'NA', 'DIVORCED', '1'}
V109 {'NO', 'DK', 'INAP', 'NA', 'YES'}
V110 {'TWO', 'H', 'CURRENTLY', 'LIVES', 'TIME', '107', 'SOME', 'HAS', 'INAP', 'ELSE', 'IN', 'OF', 'RESIDENCE--PART', 'PLACES', 'HERE;', '1', 'PLACE'}
V111 {'OTHER', '(SPECIFY)', 'DK', 'FORCES', 'OUT', 'INAP', 'JAIL/PRISON', 

V2337 {'NA', 'INAP', 'DK'}
V2338 {'CHECKED;', 'IF', 'INAP', 'LEFT', 'NA', 'BLANK', 'NOT', 'A-J', 'CHECKED'}
V2339 {'NA', 'INAP', 'DK'}
V2340 {'MONTH', '6', 'YEAR', 'DK', 'MONTHS', 'THAN', 'INAP', 'NA', 'A', 'MORE'}
V2341 {'INAP'}
V2401 {'NO', 'DK', 'INAP', 'NA', 'YES'}
V2402 {'CHECKED;', 'IF', 'INAP', 'LEFT', 'NA', 'BLANK', 'NOT', 'A-J', 'CHECKED'}
V2403 {'NA', 'INAP', 'DK'}
V2404 {'MONTH', '6', 'YEAR', 'DK', 'MONTHS', 'THAN', 'INAP', 'NA', 'A', 'MORE'}
V2405 {'NA', 'INAP', 'DK'}
V2406 {'CHECKED;', 'IF', 'INAP', 'LEFT', 'NA', 'BLANK', 'NOT', 'A-J', 'CHECKED'}
V2407 {'NA', 'INAP', 'DK'}
V2408 {'MONTH', '6', 'YEAR', 'DK', 'MONTHS', 'THAN', 'INAP', 'NA', 'A', 'MORE'}
V2409 {'NA', 'INAP', 'DK'}
V2410 {'CHECKED;', 'IF', 'INAP', 'LEFT', 'NA', 'BLANK', 'NOT', 'A-J', 'CHECKED'}
V2411 {'NA', 'INAP', 'DK'}
V2412 {'MONTH', '6', 'YEAR', 'DK', 'MONTHS', 'THAN', 'INAP', 'NA', 'A', 'MORE'}
V2413 {'NA', 'INAP', 'DK'}
V2414 {'CHECKED;', 'IF', 'INAP', 'LEFT', 'NA', 'BLANK', 'NOT', 'A-J', 'CHECKED'}
V241

V4901 {'TO', 'CURRENTLY', '--', 'R', 'ALL', 'OTHERS', 'P', 'IS', 'ENTIRELY', 'SKIPPED', 'S', 'NA', 'FOR', 'PAY', 'GO', 'WORKING', 'SECTION'}
V4902 {'DK', 'OFTEN', 'IF', 'INAP', 'VOL.', 'NA', 'RARELY', 'SOMETIMES'}
V4903 {'DK', 'OFTEN', 'IF', 'INAP', 'VOL.', 'NA', 'RARELY', 'SOMETIMES'}
V4904 {'DK', 'OFTEN', 'IF', 'INAP', 'VOL.', 'NA', 'RARELY', 'SOMETIMES'}
V4905 {'DK', 'OFTEN', 'IF', 'INAP', 'VOL.', 'NA', 'RARELY', 'SOMETIMES'}
V4906 {'DK', 'OFTEN', 'IF', 'INAP', 'VOL.', 'NA', 'RARELY', 'SOMETIMES'}
V4907 {'DK', 'OFTEN', 'IF', 'INAP', 'VOL.', 'NA', 'RARELY', 'SOMETIMES'}
V4908 {'DK', 'OFTEN', 'IF', 'INAP', 'VOL.', 'NA', 'RARELY', 'SOMETIMES'}
V4909 {'CURRENTLY', 'SPOUSE/PARTNER', 'INAP', 'R', 'ALL', 'OTHERS', 'NA', 'LIVING', 'WITH'}
V4910 {'DK', 'INAP', 'ALL', 'SOMEWHAT', 'AT', 'WILLING', 'NOT', 'NA', 'VERY'}
V4911 {'MOR', 'DK', 'LOT', 'SPENDS', 'IF', 'R', 'INAP', 'VOL.', 'SOMEWHAT', 'NA', 'LITTLE', 'A', 'HUSBAND/WIFE/PARTNER', 'MORE'}
V4912 {'DK', 'LOT', 'SOME', 'INAP', 'ALL', 'AT', 

In [7]:
len(new_variable_dict)

2945

In [8]:
for key, value in var_types.items():
    print(key, len(value))

yes_or_no 601
checked 535
history 52
inap_dk_na 710
maybe_numerical 74
unsure 973


In [9]:
var_types['unsure']

['V13',
 'V14',
 'V101',
 'V102',
 'V103',
 'V104',
 'V106',
 'V107',
 'V108',
 'V110',
 'V111',
 'V113',
 'V114',
 'V201',
 'V202',
 'V203',
 'V204',
 'V205',
 'V206',
 'V207',
 'V208',
 'V209',
 'V210',
 'V211',
 'V212',
 'V213',
 'V214',
 'V215',
 'V216',
 'V217',
 'V218',
 'V219',
 'V220',
 'V221',
 'V222',
 'V223',
 'V224',
 'V225',
 'V226',
 'V227',
 'V228',
 'V229',
 'V230',
 'V231',
 'V232',
 'V233',
 'V234',
 'V235',
 'V236',
 'V237',
 'V238',
 'V239',
 'V245',
 'V246',
 'V247',
 'V248',
 'V249',
 'V250',
 'V301',
 'V302',
 'V304',
 'V305',
 'V306',
 'V307',
 'V308',
 'V310',
 'V312',
 'V313',
 'V314',
 'V320',
 'V326',
 'V336',
 'V337',
 'V338',
 'V345',
 'V347',
 'V349',
 'V401',
 'V402',
 'V403',
 'V404',
 'V405',
 'V406',
 'V407',
 'V418',
 'V419',
 'V420',
 'V431',
 'V433',
 'V435',
 'V501',
 'V502',
 'V503',
 'V504',
 'V505',
 'V506',
 'V507',
 'V508',
 'V509',
 'V510',
 'V511',
 'V522',
 'V523',
 'V524',
 'V534',
 'V536',
 'V538',
 'V607',
 'V608',
 'V628',
 'V637',
 'V

In [11]:
new_variable_dict[var_types['yes_or_no'][0]]

{1: 'YES', 5: 'NO', 0: 'INAP', 8: 'DK', 9: 'NA'}

In [12]:
new_variable_dict[var_types['unsure'][0]]

{1: 'MALE', 2: 'FEMALE', 9: 'NA'}

In [13]:
new_variable_dict[var_types['maybe_numerical'][0]]

{99: 'NA'}

In [15]:
new_variable_dict[var_types['history'][0]]

{1: 'PAST MONTH',
 2: 'PAST SIX MONTHS',
 3: 'PAST YEAR',
 4: 'MORE THAN A YEAR AGO',
 0: 'INAP',
 8: 'DK',
 9: 'NA'}

In [16]:
def convert_df_labels (df, types, maps):
    
    new_df = df.copy()
    
    for key in types.keys():
        if key != 'maybe_numerical':
            for var_name in types[key]:
                var_map = maps[var_name]
                if type(df[var_name][0]) != str:
                    new_map = {int(key):value for key,value in var_map.items()}
                    new_df[var_name] = df[var_name].map(new_map)
                else:
                    new_map = {key:value for key,value in var_map.items()}
                    new_df[var_name] = df[var_name].map(new_map, na_action=None)

                #new_df[var_name] = df[var_name].map(new_map)
    
    return new_df

In [17]:
import pandas as pd

In [18]:
df = pd.read_csv('06693-0001-Data.tsv', delim_whitespace=True)

In [19]:
new_df = convert_df_labels(df, var_types, new_variable_dict)

In [20]:
df.shape

(8098, 2954)

In [21]:
new_df.shape

(8098, 2954)

In [22]:
new_df.head()

Unnamed: 0,CASEID,V2,V3,V4,V5,V6,V7,V8,V9,V10,...,V7437,V7438,V7439,V7440,V7441,V7442,P1FWT,P2WTV3,TOBACWT,P2TOBWT
0,10001,60080192,1,8128,9,28,90,9,28,90,...,,,,,,,0.5135,0.5883,,
1,10002,16030048,1,3941,10,1,90,10,1,90,...,,,,,,,1.8968,2.173,,
2,10003,74070592,1,7813,10,5,90,10,5,90,...,,,,,,,0.41,0.4697,,
3,10004,74030720,1,7813,10,10,90,10,10,90,...,,,,,,,0.3093,0.3543,,
4,10005,4180080,1,9018,10,10,90,10,10,90,...,,,,,,,0.779,0.8924,,


In [23]:
DS1_suicide_factors = ['V12', 'V13', 'V4515', 'V5114', 'V5118', 'V5113', 
                      'V5115', 'V5112', 'V4428', 'V4433', 'V6305', 'V6301', 
                      'V5225', 'V6649', 'V6749', 'V5918', 'V6143', 'V6126', 
                      'V6114', 'V101', 'V102', 'V6215']

In [25]:
for factor in DS1_suicide_factors:
    print(new_df[factor].value_counts())

30    298
33    297
31    284
32    282
27    266
34    265
35    258
36    250
37    249
28    245
29    240
40    239
26    230
39    226
38    226
25    219
41    212
24    209
42    200
23    197
44    197
20    197
43    185
21    180
22    179
19    168
16    167
46    167
17    160
47    160
18    159
15    152
50    151
48    149
45    148
52    146
53    145
49    141
54    136
51    110
55      4
59      2
58      1
61      1
99      1
Name: V12, dtype: int64
FEMALE    4263
MALE      3835
Name: V13, dtype: int64
INAP                            3244
NA                              2132
R CURRENTLY MARRIED             1921
R CURRENTLY DIVORCED/ANNULED     576
R CURRENTLY SEPARATED            192
R CURRENTLY WIDOWED               33
Name: V4515, dtype: int64
A LITTLE      2579
NA            2171
SOMEWHAT      1548
NOT AT ALL    1340
VERY           391
INAP            67
DK               2
Name: V5114, dtype: int64
NOT AT ALL    2871
A LITTLE      2217
NA            2180
SOMEWHAT

In [27]:
new_variable_dict['V6215']

{1: 'ONLY ONE YES RESPONSE IN U1-U12',
 2: 'TWO OR MORE YES RESPONSES IN U1-U12',
 3: 'ALL OTHERS',
 0: 'INAP',
 9: 'NA'}

In [28]:
new_df.to_csv("ncs1-ds1.csv")

In [30]:
for col in new_df.columns:
    print(new_df[col].value_counts())

20478    1
21840    1
11575    1
21816    1
23865    1
25918    1
11583    1
21824    1
23873    1
25926    1
11591    1
21832    1
23881    1
25934    1
11599    1
23889    1
23857    1
25942    1
11607    1
21848    1
23897    1
25950    1
11615    1
21856    1
23905    1
11623    1
21864    1
23913    1
11631    1
21872    1
        ..
10936    1
21175    1
10880    1
25217    1
23174    1
21127    1
10888    1
25225    1
23182    1
21135    1
10896    1
25233    1
23190    1
21143    1
10904    1
25241    1
23198    1
21151    1
10912    1
25249    1
23206    1
21159    1
10920    1
25257    1
23214    1
21167    1
10928    1
25265    1
23222    1
20482    1
Name: CASEID, Length: 8098, dtype: int64
81060224    4
74090368    4
80060032    4
80120064    4
80040064    4
80060096    3
73040128    3
84100480    3
74070016    3
64060160    3
82060224    3
74090176    3
68050240    3
80040448    3
70080448    3
80120128    3
80120192    3
81020288    3
70080576    3
80121152    3
84101248

Name: V341, dtype: int64
INAP    7991
DK         3
NA         2
Name: V342, dtype: int64
INAP                    7540
MORE THAN A YEAR AGO     233
PAST MONTH               162
PAST SIX MONTHS          106
PAST YEAR                 52
NA                         5
Name: V343, dtype: int64
INAP    7865
DK         1
Name: V344, dtype: int64
INAP       7520
NO          473
YES          99
IF VOL.       4
NA            2
Name: V345, dtype: int64
INAP    7999
NO        59
YES       40
Name: V346, dtype: int64
INAP                 7999
FEAR(S)                65
DRINKING/DRUG USE      24
IF VOL.                10
Name: V347, dtype: int64
INAP    7524
NO       437
YES      135
NA         2
Name: V348, dtype: int64
INAP       7963
YES          72
NO           43
IF VOL.      20
Name: V349, dtype: int64
NO     5743
YES    2354
NA        1
Name: V401, dtype: int64
NO     7588
YES     509
NA        1
Name: V402, dtype: int64
NO     7883
YES     214
NA        1
Name: V403, dtype: int64
NO     7006
YE

Name: V707, dtype: int64
INAP    7597
NO       306
YES      192
NA         3
Name: V708, dtype: int64
INAP    7906
Name: V709, dtype: int64
INAP    7597
NO       396
YES      101
NA         4
Name: V710, dtype: int64
INAP    7997
Name: V711, dtype: int64
INAP    7597
NO       305
YES      193
NA         3
Name: V712, dtype: int64
INAP    7905
Name: V713, dtype: int64
INAP          7597
A LOT          188
SOME           161
A LITTLE       101
NOT AT ALL      45
NA               6
Name: V714, dtype: int64
INAP                                 7597
YES RESPONSE IN B83 OR B86 OR B87     335
ALL OTHERS                            166
Name: V715, dtype: int64
INAP                                       7763
R MENTIONS STRESS/NERVES/MENTAL ILLNESS     116
R MENTIONS PANIC/ANXIETY                     97
ALL OTHERS                                   72
R MENTIONS PHYSICAL ILLNESS OR INJURY        34
R MENTIONS MEDICATIONS/DRUGS/ALCOHOL         16
Name: V716, dtype: int64
INAP    7860
NO       207
Y

0     6703
10     376
20     185
15     184
99      80
30      79
25      63
5       59
40      43
12      40
50      27
8       26
35      22
6       22
7       14
11      13
3       13
98      12
60      10
14      10
45       9
4        9
13       9
17       8
95       8
18       7
2        7
22       6
70       6
9        6
80       5
23       4
16       3
28       2
44       2
75       2
43       2
27       2
37       2
67       2
21       1
32       1
55       1
90       1
82       1
66       1
54       1
26       1
36       1
29       1
81       1
68       1
72       1
1        1
33       1
85       1
Name: V1107, dtype: int64
INAP    3511
NO      2523
YES     2061
NA         2
DK         1
Name: V1108, dtype: int64
INAP    6037
YES     1385
NO       666
NA         8
DK         2
Name: V1109, dtype: int64
INAP    3511
NO      2870
YES     1713
NA         3
DK         1
Name: V1110, dtype: int64
INAP    6385
YES     1370
NO       337
NA         6
Name: V1111, dtype: int64
INAP   

0     6108
18      88
16      85
20      84
15      82
21      82
25      81
30      81
17      76
22      75
23      72
19      68
28      68
27      68
35      58
29      57
24      56
14      56
32      52
26      51
31      50
95      46
34      43
36      42
13      42
37      39
38      39
33      37
12      30
40      30
39      24
42      23
99      21
41      21
45      20
47      17
43      17
48      14
46      11
44      10
11       9
10       8
50       8
98       8
9        7
49       7
8        7
51       5
53       4
5        3
52       3
7        2
6        2
54       1
Name: V1348, dtype: int64
INAP    8052
YES       27
NO        19
Name: V1349, dtype: int64
INAP    8052
Name: V1350, dtype: int64
INAP    6108
YES     1801
NO       166
NA        23
Name: V1351, dtype: int64
NOT CHECKED; INAP            7126
CHECKED                       948
NA IF ALL VARS LEFT BLANK      24
Name: V1401, dtype: int64
NOT CHECKED; INAP            7649
CHECKED                       427
NA

Name: V1630, dtype: int64
INAP    7882
NA         1
Name: V1631, dtype: int64
INAP    6947
NO      1021
YES      125
NA         5
Name: V1632, dtype: int64
INAP    7973
Name: V1633, dtype: int64
INAP    6947
NO       976
YES      170
NA         5
Name: V1634, dtype: int64
INAP    7928
Name: V1635, dtype: int64
INAP          6947
SOME           390
A LITTLE       375
A LOT          211
NOT AT ALL     169
NA               6
Name: V1636, dtype: int64
INAP    6947
NO      1062
YES       84
NA         5
Name: V1637, dtype: int64
INAP    8014
Name: V1638, dtype: int64
INAP                                 6947
ALL OTHERS                            572
1+ RESPONSES IN E16-E23 SERIES        345
CHECKPOINT DELETED BY INTERVIEWER     234
Name: V1639, dtype: int64
INAP                    7519
MORE THAN A YEAR AGO     468
PAST YEAR                 41
PAST MONTH                33
PAST SIX MONTHS           30
NA                         6
DK                         1
Name: V1640, dtype: int64
INAP    

Name: V1915, dtype: int64
INAP                  6690
MORE THAN YEAR AGO    1203
PAST SIX MONTHS         87
PAST MONTH              65
PAST YEAR               46
NA                       7
Name: V1916, dtype: int64
INAP                                        7900
Z. 1 OR 2 DAYS IN PAST 12 MONTHS              55
Y. 3 TO 5 DAYS IN PAST 12 MONTHS              38
W. 1 TO 2 TIMES A MONTH (12 TO 24 DAYS A      34
X. EVERY OTHER MONTH OR SO (6 TO 11 DAYS      26
V. SEVERAL TIMES A MONTH (25 TO 51 DAYS       11
S. ALMOST DAILY (3 TO 6 TIMES A WEEK)         10
R. DAILY                                      10
T. 1 OR 2 DAYS A WEEK                          8
NA                                             6
Name: V1917, dtype: int64
INAP    6895
DK         2
NA         2
Name: V1918, dtype: int64
NO      7097
YES      991
NA         7
INAP       3
Name: V1919, dtype: int64
INAP    7107
NA         1
Name: V1920, dtype: int64
INAP                    7107
A. 1 OR 2 TIMES          307
B. 3 TO 5 TIMES  

Name: V2244, dtype: int64
INAP    8046
NA         2
Name: V2245, dtype: int64
INAP    8043
YES       37
NO        17
NA         1
Name: V2246, dtype: int64
NOT CHECKED; INAP       8070
CHECKED                   27
NA IF A-J LEFT BLANK       1
Name: V2247, dtype: int64
INAP    8071
Name: V2248, dtype: int64
INAP                8071
MORE THAN A YEAR      26
MONTH                  1
Name: V2249, dtype: int64
INAP    8072
Name: V2250, dtype: int64
INAP    8071
YES       19
NO         7
NA         1
Name: V2251, dtype: int64
NO      5323
INAP    2541
YES      203
NA        31
Name: V2301, dtype: int64
NOT CHECKED; INAP    7970
CHECKED               128
Name: V2302, dtype: int64
INAP    7970
Name: V2303, dtype: int64
INAP                7970
MORE THAN A YEAR     114
6 MONTHS              11
YEAR                   2
MONTH                  1
Name: V2304, dtype: int64
INAP    7984
Name: V2305, dtype: int64
NOT CHECKED; INAP    8090
CHECKED                 8
Name: V2306, dtype: int64
INAP    809

Name: V2610, dtype: int64
INAP    8082
Name: V2611, dtype: int64
INAP                8082
MORE THAN A YEAR      11
6 MONTHS               2
MONTH                  2
YEAR                   1
Name: V2612, dtype: int64
INAP    8087
Name: V2613, dtype: int64
NOT CHECKED; INAP       8068
CHECKED                   29
NA IF A-J LEFT BLANK       1
Name: V2614, dtype: int64
INAP    8069
Name: V2615, dtype: int64
INAP                8069
MORE THAN A YEAR      25
6 MONTHS               2
YEAR                   1
MONTH                  1
Name: V2616, dtype: int64
INAP    8073
Name: V2617, dtype: int64
NOT CHECKED; INAP       8084
CHECKED                   13
NA IF A-J LEFT BLANK       1
Name: V2618, dtype: int64
INAP    8085
Name: V2619, dtype: int64
INAP                8085
MORE THAN A YEAR      10
6 MONTHS               2
MONTH                  1
Name: V2620, dtype: int64
INAP    8088
Name: V2621, dtype: int64
NOT CHECKED; INAP       8092
CHECKED                    5
NA IF A-J LEFT BLANK       1

Name: V2903, dtype: int64
INAP                7688
MORE THAN A YEAR     246
MONTH                 70
6 MONTHS              59
YEAR                  35
Name: V2904, dtype: int64
INAP    7852
NA         1
Name: V2905, dtype: int64
NOT CHECKED; INAP    8081
CHECKED                17
Name: V2906, dtype: int64
INAP    8081
Name: V2907, dtype: int64
INAP                8081
MORE THAN A YEAR      15
6 MONTHS               1
MONTH                  1
Name: V2908, dtype: int64
INAP    8083
Name: V2909, dtype: int64
NOT CHECKED; INAP    8069
CHECKED                29
Name: V2910, dtype: int64
INAP    8069
Name: V2911, dtype: int64
INAP                8069
MORE THAN A YEAR      22
6 MONTHS               4
MONTH                  3
Name: V2912, dtype: int64
INAP    8076
Name: V2913, dtype: int64
NOT CHECKED; INAP    8042
CHECKED                56
Name: V2914, dtype: int64
INAP    8042
NA         1
Name: V2915, dtype: int64
INAP                8042
MORE THAN A YEAR      44
YEAR                   5
6 

Name: V3235, dtype: int64
INAP                8055
MORE THAN A YEAR      38
6 MONTHS               3
NA                     1
MONTH                  1
Name: V3236, dtype: int64
INAP    8060
Name: V3237, dtype: int64
NOT CHECKED; INAP    8078
CHECKED                20
Name: V3238, dtype: int64
INAP    8078
Name: V3239, dtype: int64
INAP                8078
MORE THAN A YEAR      19
MONTH                  1
Name: V3240, dtype: int64
INAP    8079
Name: V3241, dtype: int64
INAP    3804
NO      2801
YES     1468
NA        25
Name: V3301, dtype: int64
NOT CHECKED; INAP    6890
CHECKED              1208
Name: V3302, dtype: int64
INAP    6890
DK         2
NA         2
Name: V3303, dtype: int64
INAP                6890
MORE THAN A YEAR     771
MONTH                177
6 MONTHS             173
YEAR                  85
NA                     2
Name: V3304, dtype: int64
INAP    7327
NA         3
DK         2
Name: V3305, dtype: int64
NOT CHECKED; INAP    8063
CHECKED                35
Name: V3306, 

Name: V3613, dtype: int64
NOT CHECKED; INAP    7996
CHECKED               102
Name: V3614, dtype: int64
INAP    7996
NA         1
Name: V3615, dtype: int64
INAP                7996
MORE THAN A YEAR      91
6 MONTHS               4
YEAR                   3
MONTH                  3
NA                     1
Name: V3616, dtype: int64
INAP    8007
Name: V3617, dtype: int64
NOT CHECKED; INAP    8065
CHECKED                33
Name: V3618, dtype: int64
INAP    8065
Name: V3619, dtype: int64
INAP                8065
MORE THAN A YEAR      20
6 MONTHS               9
MONTH                  3
YEAR                   1
Name: V3620, dtype: int64
INAP    8078
Name: V3621, dtype: int64
NOT CHECKED; INAP    8089
CHECKED                 9
Name: V3622, dtype: int64
INAP    8089
Name: V3623, dtype: int64
INAP                8089
MORE THAN A YEAR       7
6 MONTHS               1
MONTH                  1
Name: V3624, dtype: int64
INAP    8091
Name: V3625, dtype: int64
NOT CHECKED; INAP    7964
CHECKED       

0    5892
2     566
4     520
3     497
1     316
5     206
6      65
7      26
8       7
9       3
Name: V4000, dtype: int64
INAP    5892
R       2206
Name: V4001, dtype: int64
INAP      5892
FEMALE    1206
MALE      1000
Name: V4002, dtype: int64
INAP    5892
NA         1
Name: V4003, dtype: int64
Spouse of R                                 1299
Child (incl. step/adopted children); fos     277
Parent                                       145
Partner of R                                  81
Sibling                                       32
Roommate                                      31
Grandchild (incl. great)                       3
Aunt/uncle (incl. great)                       3
Niece/nephew                                   2
Cousin                                         2
Grandparent (incl. great)                      1
Name: V4004, dtype: int64
INAP      6208
MALE      1103
FEMALE     787
Name: V4005, dtype: int64
INAP    6208
NA         6
DK         1
Name: V4006, dtype: int6

Name: V4217, dtype: int64
NOT CHECKED; INAP    5953
NA                   2142
CHECKED                 3
Name: V4218, dtype: int64
NOT CHECKED; INAP    5944
NA                   2142
CHECKED                12
Name: V4219, dtype: int64
NOT CHECKED; INAP    5955
NA                   2142
CHECKED                 1
Name: V4220, dtype: int64
NOT CHECKED; INAP    5948
NA                   2142
CHECKED                 8
Name: V4221, dtype: int64
NOT CHECKED; INAP    5956
NA                   2142
Name: V4222, dtype: int64
NOT CHECKED; INAP    5956
NA                   2142
Name: V4223, dtype: int64
NOT CHECKED; INAP    5949
NA                   2142
CHECKED                 7
Name: V4224, dtype: int64
NOT CHECKED; INAP    5955
NA                   2142
CHECKED                 1
Name: V4225, dtype: int64
NOT CHECKED; INAP    5947
NA                   2142
CHECKED                 9
Name: V4226, dtype: int64
NOT CHECKED; INAP    5956
NA                   2142
Name: V4227, dtype: int64
NOT CHECKED;

Name: V4607, dtype: int64
INAP         3052
NA           2144
NEVER        1607
RARELY        820
SOMETIMES     393
OFTEN          81
DK              1
Name: V4608, dtype: int64
INAP         3052
NA           2146
NEVER        1144
RARELY       1100
SOMETIMES     521
OFTEN         135
Name: V4609, dtype: int64
INAP         3052
NEVER        2260
NA           2143
RARELY        408
SOMETIMES     173
OFTEN          62
Name: V4610, dtype: int64
INAP         3052
NA           2147
NEVER        2058
RARELY        558
SOMETIMES     190
OFTEN          93
Name: V4611, dtype: int64
INAP         3052
NEVER        2231
NA           2142
RARELY        450
SOMETIMES     142
OFTEN          81
Name: V4612, dtype: int64
INAP         3052
NA           2141
NEVER        1654
RARELY        765
SOMETIMES     375
OFTEN         111
Name: V4613, dtype: int64
INAP                                 3052
NA                                   2136
BOTH EQUAL                           1276
R HAS A LITTLE            

0     5740
99    2143
1      185
2       26
3        2
5        1
4        1
Name: V5049, dtype: int64
INAP    5770
NA      2145
DK         1
Name: V5050, dtype: int64
INAP    5939
NA      2142
Name: V5051, dtype: int64
INAP    5925
NA      2143
Name: V5052, dtype: int64
INAP    5925
NA      2143
DK         2
Name: V5053, dtype: int64
INAP    5955
NA      2142
Name: V5054, dtype: int64
1+ CHILDREN LISTED IN Q4    3372
ALL OTHERS                  2524
NA                          2142
INAP                          60
Name: V5055, dtype: int64
INAP                                        2584
NA                                          2156
EXCELLENT                                   1961
GOOD                                        1138
FAIR                                         178
POOR                                          59
NO RELATIONSHIP WITH CHILD(REN); NEVER S      22
Name: V5056, dtype: int64
INAP         2606
NA           2151
NEVER        1216
RARELY       1201
SOMETIMES   

NO      5449
NA      2150
YES      434
INAP      65
Name: V5406, dtype: int64
0     5522
99    2142
1      273
2       68
3       39
4       14
5       10
6        7
10       4
15       3
12       3
8        3
14       2
98       2
7        2
9        1
30       1
50       1
20       1
Name: V5407, dtype: int64
INAP                    5683
NA                      2142
MORE THAN A YEAR AGO     249
PAST SIX MONTHS           12
PAST YEAR                  8
PAST MONTH                 4
Name: V5408, dtype: int64
INAP    5707
NA      2142
Name: V5409, dtype: int64
INAP    5932
NA      2142
Name: V5410, dtype: int64
INAP      5932
NA        2145
DAYS        17
WEEKS        3
MONTHS       1
Name: V5411, dtype: int64
INAP    5683
NA      2142
YES      228
NO        45
Name: V5412, dtype: int64
INAP    5683
NA      2144
NO       178
YES       93
Name: V5413, dtype: int64
INAP    5683
NA      2143
NO       204
YES       68
Name: V5414, dtype: int64
INAP    5795
NA      2143
Name: V5415, dtype: in

Name: V5813, dtype: int64
INAP    5451
NA      2157
NO       444
YES       46
Name: V5814, dtype: int64
INAP    5451
NA      2156
NO       447
YES       44
Name: V5815, dtype: int64
INAP    5451
NA      2159
NO       425
YES       63
Name: V5816, dtype: int64
0     5001
99    2156
1      270
2      160
3       95
5       85
30      78
4       62
7       50
14      22
10      21
15      19
6       17
20      14
21      12
12       7
8        6
9        4
25       4
17       2
28       2
24       2
16       2
26       2
11       1
23       1
18       1
22       1
29       1
Name: V5901, dtype: int64
INAP    5686
NA      2142
NO       214
YES       56
Name: V5902, dtype: int64
NONE; INAP    5773
NA            2155
Name: V5903, dtype: int64
0     4234
99    2158
2      324
3      236
1      225
5      224
4      134
30     111
10     105
7       81
15      61
20      45
14      33
6       29
25      26
8       13
28      12
12       6
27       6
21       6
26       5
23       4
9        4


0     5590
99    2143
1      247
2       60
3       32
4       16
5        8
7        1
10       1
Name: V6310, dtype: int64
INAP    5837
NA      2143
Name: V6311, dtype: int64
INAP                                        5837
NA                                          2143
(C) MY ATTEMPT WAS A CRY FOR HELP, I DID      53
(A) I MADE A SERIOUS ATTEMPT TO KILL MYS      49
(B) I TRIED TO KILL MYSELF, BUT I KNEW T      15
DK                                             1
Name: V6312, dtype: int64
INAP                    5590
NA                      2144
MORE THAN A YEAR AGO     318
PAST SIX MONTHS           24
PAST YEAR                 15
PAST MONTH                 7
Name: V6313, dtype: int64
INAP    5638
NA      2142
Name: V6314, dtype: int64
INAP    5908
NA      2145
NO        23
YES       22
Name: V6315, dtype: int64
INAP    5934
NA      2142
YES       15
NO         7
Name: V6316, dtype: int64
INAP    5941
NA      2142
YES       10
NO         5
Name: V6317, dtype: int64
INAP             

Name: V6707, dtype: int64
INAP    3914
NA      2144
YES     1176
NO       701
DK       163
Name: V6708, dtype: int64
INAP    3914
NA      2145
YES     1229
NO       492
DK       318
Name: V6709, dtype: int64
INAP    3914
NA      2147
YES      995
NO       850
DK       192
Name: V6710, dtype: int64
INAP    3914
NA      2146
YES     1074
NO       813
DK       151
Name: V6711, dtype: int64
INAP    3914
NA      2145
NO      1149
YES      737
DK       153
Name: V6712, dtype: int64
INAP    3914
NA      2144
YES     1065
NO       720
DK       255
Name: V6713, dtype: int64
INAP    3914
NA      2145
NO      1637
YES      319
DK        83
Name: V6714, dtype: int64
INAP                                      3914
NA                                        2142
TWO OR MORE YES RESPONSES IN X29A-X29J    1786
ALL OTHERS                                 256
Name: V6715, dtype: int64
INAP    4170
NA      2147
NO      1083
YES      644
DK        54
Name: V6716, dtype: int64
INAP          5312
NA           

Name: V7102, dtype: int64
SOMEWHAT IMPORTANT      2273
NA                      2171
VERY IMPORTANT          2133
NOT VERY IMPORTANT       976
NOT AT ALL IMPORTANT     477
INAP                      68
Name: V7103, dtype: int64
ALL OTHERS                              5207
NA                                      2142
INAP                                     545
R IS OTHER NON-CHRISTIAN (NO AT Y1B)     131
R IS JEWISH                               73
Name: V7104, dtype: int64
NO      3214
NA      2174
YES     1954
INAP     749
DK         7
Name: V7105, dtype: int64
NO      3158
NA      2171
YES     2019
INAP     749
DK         1
Name: V7106, dtype: int64
AGREE       2577
DISAGREE    2538
NA          2211
INAP         749
DK            23
Name: V7107, dtype: int64
NA                        2166
LESS THAN ONCE A MONTH    1981
NEVER                     1392
ABOUT ONCE A WEEK         1060
1 TO 3 TIMES A MONTH       930
MORE THAN ONCE A WEEK      501
INAP                        68
Name: V7108, 

5.6710    123
0.6290     73
0.5981     71
0.6022     69
0.6247     63
0.4835     61
0.5579     52
0.5541     52
1.0227     48
0.9365     47
0.9706     46
0.4669     45
1.0158     45
0.7671     41
0.5270     39
0.7619     37
0.9640     34
0.7680     33
0.6527     33
0.6267     30
0.5806     29
1.2465     28
1.0606     28
0.3148     28
0.6546     27
0.3169     27
0.8312     27
0.1536     26
0.6483     26
0.6820     25
         ... 
2.8118      1
0.3891      1
0.6296      1
0.4899      1
0.3773      1
0.9421      1
0.5046      1
0.2088      1
4.7107      1
0.4711      1
0.8240      1
1.1600      1
3.4113      1
0.2967      1
0.4562      1
0.5316      1
1.8169      1
1.0163      1
0.1957      1
0.4678      1
0.3782      1
0.8112      1
0.6359      1
0.7794      1
1.1066      1
3.1743      1
0.7662      1
0.3007      1
2.5628      1
3.2834      1
Name: P1FWT, Length: 2858, dtype: int64
0.2128    450
5.4941    262
2.1066    207
0.2144    129
2.1222    121
0.5539     41
6.4967     35
0.7206  

In [36]:
new_df.query('V418 == "INAP" | V417 == "INAP"')

Unnamed: 0,CASEID,V2,V3,V4,V5,V6,V7,V8,V9,V10,...,V7437,V7438,V7439,V7440,V7441,V7442,P1FWT,P2WTV3,TOBACWT,P2TOBWT
0,10001,60080192,1,8128,9,28,90,9,28,90,...,,,,,,,0.5135,0.5883,,
1,10002,16030048,1,3941,10,1,90,10,1,90,...,,,,,,,1.8968,2.1730,,
2,10003,74070592,1,7813,10,5,90,10,5,90,...,,,,,,,0.4100,0.4697,,
3,10004,74030720,1,7813,10,10,90,10,10,90,...,,,,,,,0.3093,0.3543,,
4,10005,4180080,1,9018,10,10,90,10,10,90,...,,,,,,,0.7790,0.8924,,
5,10006,4180270,1,9018,10,15,90,10,15,90,...,,,,,,,0.2511,0.2877,,
6,10007,28110144,1,6897,10,16,90,10,16,90,...,,,,,,,0.2911,0.3335,,
7,10008,28110064,1,6897,10,11,90,10,11,90,...,,,,,,,0.3174,0.3636,,
8,10009,42069984,1,9212,10,23,90,10,23,90,...,,,,,,,0.8107,0.9287,,
9,10010,3330260,1,9158,10,21,90,10,21,90,...,,,,,,,0.9023,1.0337,,


In [39]:
DS1_suicide_factors[3:]

['V5114',
 'V5118',
 'V5113',
 'V5115',
 'V5112',
 'V4428',
 'V4433',
 'V6305',
 'V6301',
 'V5225',
 'V6649',
 'V6749',
 'V5918',
 'V6143',
 'V6126',
 'V6114',
 'V101',
 'V102',
 'V6215']

In [44]:
new_df['V6215'] = new_df['V6215'].map(new_variable_dict['V6215'])

In [45]:
new_df['V6215'].value_counts()

ALL OTHERS                             2188
NA                                     2142
TWO OR MORE YES RESPONSES IN U1-U12    2113
ONLY ONE YES RESPONSE IN U1-U12        1575
INAP                                     80
Name: V6215, dtype: int64

In [64]:
new_df.to_csv("ncs1-ds1.csv", index=False)

In [56]:
def build_inap_query_string(df, factors):
    sub_factors = factors[3:]
    
    query_str = f'{sub_factors[0]} != "INAP"'
    for factor in sub_factors:
        query_str += f' & {factor} != "INAP"'
        
    print(query_str)
    return df.query(query_str)

In [57]:
all_inaps_df = build_inap_query_string(new_df, DS1_suicide_factors)

V5114 != "INAP" & V5114 != "INAP" & V5118 != "INAP" & V5113 != "INAP" & V5115 != "INAP" & V5112 != "INAP" & V4428 != "INAP" & V4433 != "INAP" & V6305 != "INAP" & V6301 != "INAP" & V5225 != "INAP" & V6649 != "INAP" & V6749 != "INAP" & V5918 != "INAP" & V6143 != "INAP" & V6126 != "INAP" & V6114 != "INAP" & V101 != "INAP" & V102 != "INAP" & V6215 != "INAP"


In [58]:
all_inaps_df.shape

(3831, 2954)

In [63]:
all_inaps_df[DS1_suicide_factors].to_csv("removed_inaps.csv", index=False)

In [76]:
df.iloc[:,1577].value_counts()

0     4708
1     1763
2      921
3      427
4      170
5       68
6       23
7        8
99       7
66       3
Name: V3801, dtype: int64

In [77]:
new_variable_dict['V3801']

{66: 'SECTION H SKIPPED ENTIRELY', 98: 'DK', 99: 'NA'}

In [78]:
var_dict['V3801']

{'0': '-',
 '1': '-',
 '2': '-',
 '3': '-',
 '4': '-',
 '5': '-',
 '6': '-',
 '7': '-',
 '66': 'SECTION H SKIPPED ENTIRELY',
 '98': 'DK',
 '99': 'NA'}

In [79]:
new_df['V3801'].value_counts()

NA                            7
SECTION H SKIPPED ENTIRELY    3
Name: V3801, dtype: int64

In [80]:
mixed_types = (1577,1613,1633,1634,1637,1640,1643,1646,1649,1652,1655,1684,1685,1686,1694,1696,1697,1699,1700,1701,1702,1703,1704,1705,1706,1707,1708,1709,1710,1711,1712,1713,1714,1715,1716,1717,1718,1719,1720,1721,1722,1723,1724,1725,1726,1727,1728,1729,1730,1731,1732,1733,1734,1735,1736,1737,1738,1739,1740,1741,1742,1743,1744,1745,1746,1747,1748,1749,1750,1751,1752,1753,1754,1755,1756,1757,1758,1759,1760,1761,1762,1763,1764,1765,1766,1767,1768,1769,1770,1771,1772,1773,1774,1775,1776,1777,1778,1779,1780,1781,1782,1783,1784,1785,1787,1788,1789,1790,1791,1792,1793,1794,1795,1796,1797,1798,1799,1800,1801,1802,1803,1804,1805,1806,1807,1808,1809,1810,1811,1812,1813,1814,1815,1816,1817,1818,1819,1820,1821,1822,1823,1824,1825,1826,1827,1828,1829,1830,1831,1832,1833,1834,1835,1836,1837,1838,1839,1840,1841,1842,1843,1844,1845,1846,1847,1848,1849,1850,1851,1852,1853,1854,1855,1856,1857,1858,1859,1860,1861,1862,1863,1864,1865,1866,1867,1868,1869,1870,1871,1872,1873,1874,1875,1876,1877,1878,1879,1880,1881,1882,1883,1884,1885,1886,1887,1888,1889,1890,1891,1892,1893,1894,1895,1896,1897,1898,1899,1900,1901,1902,1903,1904,1905,1906,1907,1908,1909,1910,1911,1912,1913,1914,1915,1916,1917,1918,1919,1920,1921,1930,1931,1932,1933,1934,1935,1936,1937,1938,1939,1941,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2017,2018,2019,2020,2021,2022,2024,2025,2026,2027,2028,2030,2031,2032,2033,2034,2035,2036,2037,2038,2039,2040,2041,2042,2043,2044,2045,2046,2047,2048,2049,2051,2052,2053,2054,2056,2057,2058,2059,2060,2061,2062,2063,2064,2065,2066,2067,2068,2069,2070,2071,2072,2073,2074,2075,2076,2077,2078,2079,2080,2081,2082,2083,2084,2085,2086,2087,2088,2089,2090,2091,2092,2093,2094,2095,2096,2097,2098,2099,2100,2101,2102,2103,2104,2105,2106,2107,2108,2109,2110,2111,2112,2113,2114,2115,2116,2117,2118,2119,2120,2121,2122,2123,2124,2125,2126,2127,2128,2129,2130,2131,2132,2133,2134,2135,2136,2137,2138,2139,2140,2141,2143,2144,2145,2146,2147,2148,2149,2150,2151,2152,2153,2154,2155,2156,2157,2158,2159,2160,2161,2162,2163,2164,2165,2166,2167,2168,2169,2170,2171,2172,2173,2174,2175,2176,2177,2178,2180,2181,2182,2183,2184,2185,2186,2187,2188,2189,2190,2191,2192,2193,2194,2195,2196,2197,2198,2199,2200,2201,2202,2203,2204,2205,2207,2208,2209,2210,2211,2212,2213,2214,2215,2216,2217,2218,2219,2220,2221,2222,2223,2224,2225,2226,2227,2228,2229,2230,2231,2232,2233,2234,2235,2236,2237,2238,2239,2240,2241,2242,2243,2244,2245,2246,2247,2248,2249,2250,2251,2252,2253,2254,2255,2256,2257,2258,2259,2260,2261,2262,2263,2264,2265,2266,2267,2268,2269,2270,2271,2272,2273,2274,2275,2276,2277,2278,2279,2280,2281,2282,2283,2284,2285,2286,2287,2288,2289,2290,2291,2292,2293,2294,2295,2296,2297,2298,2299,2300,2301,2302,2303,2304,2305,2306,2307,2308,2309,2310,2311,2312,2313,2314,2315,2316,2317,2318,2319,2320,2321,2322,2323,2324,2325,2326,2327,2328,2329,2330,2331,2332,2333,2334,2335,2336,2337,2338,2339,2341,2342,2344,2345,2347,2348,2349,2350,2351,2352,2353,2354,2355,2356,2357,2358,2359,2360,2361,2362,2363,2364,2369,2370,2371,2372,2373,2377,2378,2379,2380,2381,2382,2383,2384,2385,2386,2387,2388,2389,2390,2391,2392,2393,2394,2395,2396,2397,2398,2399,2400,2401,2402,2403,2404,2405,2406,2407,2408,2409,2410,2411,2412,2413,2414,2415,2416,2417,2418,2419,2420,2421,2422,2423,2424,2425,2426,2427,2428,2429,2430,2431,2432,2433,2434,2435,2436,2437,2438,2439,2440,2441,2442,2443,2444,2445,2446,2447,2448,2449,2450,2451,2452,2453,2454,2455,2456,2457,2458,2459,2460,2461,2462,2463,2464,2465,2466,2467,2468,2469,2470,2471,2472,2473,2474,2475,2476,2477,2478,2479,2480,2481,2482,2483,2484,2485,2486,2487,2488,2489,2490,2491,2492,2493,2494,2495,2496,2497,2498,2500,2501,2502,2503,2504,2505,2506,2507,2508,2509,2510,2511,2512,2513,2514,2515,2516,2517,2518,2519,2520,2521,2522,2523,2524,2525,2526,2527,2528,2529,2530,2531,2532,2533,2534,2535,2536,2537,2538,2539,2540,2541,2542,2543,2544,2545,2546,2547,2548,2549,2550,2551,2552,2553,2554,2555,2556,2557,2558,2559,2560,2561,2562,2563,2564,2565,2566,2567,2568,2569,2570,2571,2572,2573,2574,2575,2576,2577,2578,2579,2580,2583,2584,2585,2586,2587,2588,2589,2590,2591,2592,2593,2594,2595,2596,2597,2598,2599,2600,2601,2602,2603,2604,2605,2606,2607,2608,2609,2610,2611,2612,2613,2614,2615,2616,2617,2618,2619,2620,2621,2622,2623,2624,2625,2626,2627,2628,2629,2630,2631,2632,2633,2634,2635,2636,2637,2638,2639,2640,2641,2642,2643,2644,2645,2646,2647,2648,2649,2650,2651,2652,2653,2654,2655,2656,2657,2658,2659,2660,2661,2662,2663,2664,2665,2666,2667,2668,2669,2670,2671,2672,2673,2674,2675,2676,2677,2678,2679,2680,2681,2682,2683,2684,2685,2686,2687,2688,2689,2690,2691,2692,2693,2694,2695,2696,2697,2698,2699,2700,2701,2702,2703,2704,2705,2706,2707,2708,2709,2710,2711,2712,2713,2714,2715,2716,2717,2718,2720,2721,2722,2723,2724,2725,2726,2727,2728,2729,2730,2731,2732,2733,2734,2735,2736,2737,2738,2739,2740,2741,2742,2743,2744,2745,2746,2747,2748,2749,2750,2751,2752,2753,2754,2755,2756,2757,2758,2759,2760,2761,2762,2763,2764,2765,2766,2767,2768,2769,2770,2771,2772,2773,2774,2775,2776,2777,2778,2779,2780,2781,2782,2783,2784,2785,2786,2787,2788,2789,2790,2791,2792,2793,2794,2795,2796,2797,2798,2799,2800,2801,2802,2803,2805,2806,2807,2808,2809,2810,2811,2812,2813,2814,2815,2816,2817,2819,2820,2821,2822,2823,2824,2825,2826,2827,2828,2830,2831,2833,2834,2835,2836,2837,2838,2839,2840,2841,2842,2843,2844,2845,2846,2847,2848,2849,2850,2851,2852,2853,2854,2855,2856,2857,2858,2859,2860,2861,2862,2863,2864,2865,2866,2867,2868,2869,2870,2871,2872,2875,2876,2877,2878,2879,2880,2881,2882,2883,2884,2885,2886,2887,2888,2889,2890,2891,2892,2893,2894,2895,2896,2897,2898,2899,2900,2901,2902,2903,2904,2905,2906,2907,2910,2911,2912,2913,2914,2915,2916,2917,2918,2919,2920,2921,2922,2923,2924,2925,2926,2927,2928,2929,2930,2931,2932,2934,2935,2936,2937,2938,2939,2940,2941,2942,2943,2944,2945,2946,2947,2948,2949)

In [81]:
len(mixed_types)

1226

In [82]:
DS1_suicide_factors in mixed_types

False

In [85]:
mixed_types_names = [df.columns[idx] for idx in mixed_types]

In [86]:
mixed_types_names

['V3801',
 'V3920',
 'V4003',
 'V4004',
 'V4007',
 'V4010',
 'V4013',
 'V4016',
 'V4019',
 'V4022',
 'V4025',
 'V4086',
 'V4087',
 'V4088',
 'V4096',
 'V4098',
 'V4099',
 'V4101',
 'V4102',
 'V4103',
 'V4104',
 'V4105',
 'V4106',
 'V4107',
 'V4108',
 'V4109',
 'V4110',
 'V4111',
 'V4112',
 'V4113',
 'V4114',
 'V4115',
 'V4116',
 'V4117',
 'V4118',
 'V4119',
 'V4120',
 'V4121',
 'V4122',
 'V4123',
 'V4124',
 'V4125',
 'V4126',
 'V4127',
 'V4128',
 'V4129',
 'V4130',
 'V4131',
 'V4132',
 'V4133',
 'V4134',
 'V4135',
 'V4136',
 'V4137',
 'V4201',
 'V4202',
 'V4203',
 'V4204',
 'V4205',
 'V4206',
 'V4207',
 'V4208',
 'V4209',
 'V4210',
 'V4211',
 'V4212',
 'V4213',
 'V4214',
 'V4215',
 'V4216',
 'V4217',
 'V4218',
 'V4219',
 'V4220',
 'V4221',
 'V4222',
 'V4223',
 'V4224',
 'V4225',
 'V4226',
 'V4227',
 'V4228',
 'V4229',
 'V4230',
 'V4231',
 'V4232',
 'V4233',
 'V4234',
 'V4235',
 'V4236',
 'V4301',
 'V4302',
 'V4303',
 'V4304',
 'V4305',
 'V4306',
 'V4307',
 'V4308',
 'V4309',
 'V4310',


In [87]:
DS1_suicide_factors in mixed_types_names

False

In [88]:
new_df['V6827'].value_counts()

INAP    5822
NA      2144
DK         1
Name: V6827, dtype: int64

In [89]:
var_types['maybe_numerical'] in mixed_types_names

False