In [1]:
# Code adapted from https://github.com/Quartz/vital-mortality/blob/master/mortality.py

In [2]:
#!/usr/bin/env python

from collections import defaultdict
import csv

'''
Mortality data is from the CDC/NVSS
Downloads: https://www.cdc.gov/nchs/data_access/VitalStatsOnline.htm#Mortality_Multiple
Documentation: https://www.cdc.gov/nchs/nvss/mortality_public_use_data.htm
'''

FILENAMES = [
    'VS09MORT.DUSMCPUB',
    'VS10MORT.DUSMCPUB',
    'VS11MORT.DUSMCPUB',
    'VS12MORT.DUSMCPUB',
    'VS13MORT.DUSMCPUB',
    'VS14MORT.DUSMCPUB',
    'VS15MORT.DUSMCPUB',
    'VS16MORT.DUSMCPUB',
    'VS17MORT.DUSMCPUB',
    'Mort2018USPubUse.txt',
    'VS19MORT.DUSMCPUB',
]

RACE_MAP = {
    '1': 'Hispanic',
    '2': 'Hispanic',
    '3': 'Hispanic',
    '4': 'Hispanic',
    '5': 'Hispanic',
    '6': 'White',
    '7': 'Black',
    '8': 'Other',
    '9': 'Unknown',
    ' ': 'Unknown'
}

EDUCATION_MAP = {
    '1': 'Less than High School',
    '2': 'Less than High School',
    '3': 'High School degree or GED',
    '4': 'Some College but no degree',
    '5': 'Associate',
    '6': 'BA',
    '7': 'Masters',
    '8': 'Doctorate or Professional',
    '9': 'Unknown',
    ' ': 'Unknown'
}

AGE_MAP = {
    '28': '10-14',
    '29': '15-19',
    '30': '20-24',
    '31': '25-29',
    '32': '30-34',
    '33': '35-39',
    '34': '40-44',
    '35': '45-49',
    '36': '50-54',
    '37': '55-59',
    '38': '60-64',
    '39': '65-69',
    '40': '70-74',
    '41': '75-79',
    '42': '80-84',
    '43': '85-89',
    '44': '90+',
    '45': '90+',
    '46': '90+',
    '47': '90+',
    '48': '90+',
    '49': '90+',
    '50': '90+',
    '51': '90+',
    '52': 'Unknown'
}


MANNER_MAP = {
    '1': 'Accident',
    '2': 'Suicide',
    '3': 'Homicide',
    '4': 'Pending Investigation',
    '5': 'Could not determine',
    '6': 'Self-Inflicted',
    '7': 'Natural',
    ' ': 'Unknown'
}

In [3]:
data = []

for filename in FILENAMES:
    print(filename)

    with open('data/%s' % filename) as f:
        n = 0

        for line in f:
            
            # DEMOGRAPHICS
            year = line[101:105]
            gender = line[68]
            age_flag = line[74:76]
            marital_status = line[83]
            

            # Exclude those under 15 years of age
            if int(age_flag) < 29:
                continue
            age_key = AGE_MAP[age_flag]

            
            race_flag = line[487]
            race_key = RACE_MAP[race_flag]

            ed_flag = line[62]
            ed_key = EDUCATION_MAP[ed_flag]
            
            
            # DEATH STATS
            manner_key = line[106]
            manner_of_death = MANNER_MAP[manner_key]

            ICD_Code = line[145:149]

            # APPEND DATA
            data.append([year, gender, age_key, race_key, marital_status, ed_key, 
                         manner_of_death, ICD_Code])
            n += 1

            if n % 100000 == 0:
                print(n)

VS09MORT.DUSMCPUB


FileNotFoundError: [Errno 2] No such file or directory: 'data/VS09MORT.DUSMCPUB'

In [None]:
with open('mortality.csv', 'w') as f:
    writer = csv.writer(f)
    writer.writerow(['year', 'gender', 'age_key', 'race_key', 'marital_status', 'ed_key', 
                     'manner_of_death', 'underlying_cause', 'ICD_Code'])
    writer.writerows(data)