In [1]:
f = open("US_births_1994-2003_CDC_NCHS.csv")
births_str = f.read()
births_data = births_str.split('\n')
print(births_data[:10])


['year,month,date_of_month,day_of_week,births', '1994,1,1,6,8096', '1994,1,2,7,7772', '1994,1,3,1,10142', '1994,1,4,2,11248', '1994,1,5,3,11053', '1994,1,6,4,11406', '1994,1,7,5,11251', '1994,1,8,6,8653', '1994,1,9,7,7910']


In [2]:
def read_csv(f):
    """Converts a csv file to a headerless list of lists with int elements."""
    file = open(f,"r")
    content = file.read()
    split_content = content.split('\n')
    headerless_split_content = split_content[1:]
    string_list = headerless_split_content
    final_list = []
    
    for row in string_list:
        int_fields = []
        string_fields = row.split(',')
        for e in string_fields:
            int_fields.append(int(e))
        final_list.append(int_fields)

    return final_list

cdc_list = read_csv("US_births_1994-2003_CDC_NCHS.csv")
print(len(cdc_list))
print(cdc_list[len(cdc_list)-10:])

3652
[[2003, 12, 22, 1, 12967], [2003, 12, 23, 2, 12598], [2003, 12, 24, 3, 9096], [2003, 12, 25, 4, 6628], [2003, 12, 26, 5, 10218], [2003, 12, 27, 6, 8646], [2003, 12, 28, 7, 7645], [2003, 12, 29, 1, 12823], [2003, 12, 30, 2, 14438], [2003, 12, 31, 3, 12374]]


In [3]:
ssa_list = read_csv("US_births_2000-2014_SSA.csv")
print(len(ssa_list))
print(ssa_list[:10])

5479
[[2000, 1, 1, 6, 9083], [2000, 1, 2, 7, 8006], [2000, 1, 3, 1, 11363], [2000, 1, 4, 2, 13032], [2000, 1, 5, 3, 12558], [2000, 1, 6, 4, 12466], [2000, 1, 7, 5, 12516], [2000, 1, 8, 6, 8934], [2000, 1, 9, 7, 7949], [2000, 1, 10, 1, 11668]]


In [4]:
def combine_lists(l_one, l_two):
    combined_list = []
    for row in l_one:        
        combined_list.append(row)
    for row in l_two:
        row_year = row[0]
        if row_year > 2003:
            combined_list.append(row)
    return combined_list

In [5]:
cdc_ssa_list = combine_lists(cdc_list, ssa_list)
print(len(cdc_ssa_list))

7670


In [6]:
def date_births(data, year, month, date_of_month):
    """Returns a list with the births (int) for given date in a dataset. Multiple elements in the list imply duplicate dates in the data.
    
    First argument, data, must be a list of lists (all int) with the following format:
    index 0: year
    index 1: month
    index 2: date_of_month
    index 3: day_of_week
    index 4: births
    
    3 required arguments constrain the output: year, month, and date_of_month.
    """
    births_list = []
    for row in data:
        row_year = row[0]
        row_month = row[1]
        row_date_of_month = row[2]
        if row_year == year:
            if row_month == month:
                if row_date_of_month == date_of_month:
                    births_list.append(row[4])
    return births_list

In [7]:
help(date_births)

Help on function date_births in module __main__:

date_births(data, year, month, date_of_month)
    Returns a list with the births (int) for given date in a dataset. Multiple elements in the list imply duplicate dates in the data.
    
    First argument, data, must be a list of lists (all int) with the following format:
    index 0: year
    index 1: month
    index 2: date_of_month
    index 3: day_of_week
    index 4: births
    
    3 required arguments constrain the output: year, month, and date_of_month.



In [8]:
date_births(cdc_list, 2000, 1, 1)

[8843]

In [9]:
date_births(ssa_list, 2000, 1, 1)

[9083]

In [10]:
date_births(cdc_ssa_list, 2000, 1, 1)

[8843]

In [11]:
def month_births(births_list):
    """Returns a dict with month (int) keys and births values.
    Input must be a list of lists with month at index 1 and births at index 4.
    """
    births_per_month = {}
    for row in births_list:
        month = row[1]
        births = row[4]
        if month in births_per_month:
            births_per_month[month] += births
        else:
            births_per_month[month] = births
    
    return births_per_month

cdc_month_births = month_births(cdc_list)

In [12]:
cdc_month_births

{1: 3232517,
 2: 3018140,
 3: 3322069,
 4: 3185314,
 5: 3350907,
 6: 3296530,
 7: 3498783,
 8: 3525858,
 9: 3439698,
 10: 3378814,
 11: 3171647,
 12: 3301860}

In [13]:
def dow_births(births_list):
    """Returns a dict with day of week (int) keys and births values.
    Input must be a list of lists with day of week at index 3 and births at index 4.
    """
    births_per_day = {}
    for row in births_list:
        day = row[3]
        births = row[4]
        if day in births_per_day:
            births_per_day[day] += births
        else:
            births_per_day[day] = births
    return births_per_day

cdc_day_births = dow_births(cdc_list)

In [14]:
cdc_day_births

{6: 4562111,
 7: 4079723,
 1: 5789166,
 2: 6446196,
 3: 6322855,
 4: 6288429,
 5: 6233657}

In [15]:
def calc_counts(data, column):
    """Returns a dict with unique column elements as keys and births as values.
    Input must be a list of lists.
    """
    births_per_column = {}
    for row in data:
        column_num = row[column]
        births = row[4]
        if column_num in births_per_column:
            births_per_column[column_num] += births
        else:
            births_per_column[column_num] = births
    return births_per_column

cdc_year_births = calc_counts(cdc_list, 0)
cdc_month_births = calc_counts(cdc_list, 1)
cdc_dom_births = calc_counts(cdc_list, 2)
cdc_dow_births = calc_counts(cdc_list,3)

In [16]:
cdc_year_births, cdc_month_births, cdc_dom_births, cdc_dow_births

({1994: 3952767,
  1995: 3899589,
  1996: 3891494,
  1997: 3880894,
  1998: 3941553,
  1999: 3959417,
  2000: 4058814,
  2001: 4025933,
  2002: 4021726,
  2003: 4089950},
 {1: 3232517,
  2: 3018140,
  3: 3322069,
  4: 3185314,
  5: 3350907,
  6: 3296530,
  7: 3498783,
  8: 3525858,
  9: 3439698,
  10: 3378814,
  11: 3171647,
  12: 3301860},
 {1: 1276557,
  2: 1288739,
  3: 1304499,
  4: 1288154,
  5: 1299953,
  6: 1304474,
  7: 1310459,
  8: 1312297,
  9: 1303292,
  10: 1320764,
  11: 1314361,
  12: 1318437,
  13: 1277684,
  14: 1320153,
  15: 1319171,
  16: 1315192,
  17: 1324953,
  18: 1326855,
  19: 1318727,
  20: 1324821,
  21: 1322897,
  22: 1317381,
  23: 1293290,
  24: 1288083,
  25: 1272116,
  26: 1284796,
  27: 1294395,
  28: 1307685,
  29: 1223161,
  30: 1202095,
  31: 746696},
 {6: 4562111,
  7: 4079723,
  1: 5789166,
  2: 6446196,
  3: 6322855,
  4: 6288429,
  5: 6233657})

In [17]:
def dict_min_max(d):
    """Returns the min and max values of a dictionary.
    Min and max are returned as tuples with key and value at index 0 and 1, respectively.
    """
    min_output = None
    max_output = None
    for key, value in d.items():
        if min_output == None and max_output == None:
            min_output = (key, value)
            max_output = (key, value)
        else:
            if value < min_output[1]:
                min_output = (key, value)
            if value > max_output[1]:
                max_output = (key, value)
    return min_output, max_output

In [18]:
dict_min_max(cdc_year_births)

((1997, 3880894), (2003, 4089950))

In [19]:
def year_changes(births_list, month=None, date_of_month=None, day_of_week=None):
    """Returns a dict with year (int) as keys and births (int) as values.
    
    First argument, births_list, must be a list of lists (all int) with the following format:
    index 0: year
    index 1: month
    index 2: date_of_month
    index 3: day_of_week
    index 4: births
    
    3 optional arguments constrain the output: month, date_of_month, and day_of_week.
    """
    output_dict = {}
    for row in births_list:
        row_year = row[0]
        row_month = row[1]
        row_date = row[2]
        row_dow = row[3]
        row_births = row[4]
        if month == None or month == row_month:
            if date_of_month == None or date_of_month == row_date:
                if day_of_week == None or day_of_week == row_dow:
                    if row_year in output_dict:
                        output_dict[row_year] += row_births
                    else:
                        output_dict[row_year] = row_births
    return output_dict
            

In [20]:
year_changes(cdc_list, month=1, day_of_week=1)

{1994: 53097,
 1995: 49889,
 1996: 48606,
 1997: 41694,
 1998: 42205,
 1999: 42693,
 2000: 55208,
 2001: 51884,
 2002: 44595,
 2003: 44310}

In [21]:
help(year_changes)

Help on function year_changes in module __main__:

year_changes(births_list, month=None, date_of_month=None, day_of_week=None)
    Returns a dict with year (int) as keys and births (int) as values.
    
    First argument, births_list, must be a list of lists (all int) with the following format:
    index 0: year
    index 1: month
    index 2: date_of_month
    index 3: day_of_week
    index 4: births
    
    3 optional arguments constrain the output: month, date_of_month, and day_of_week.

