## Introduction To The Dataset

In [1]:
csv_list = open("US_births_1994-2003_CDC_NCHS.csv").read().split("\n")

In [2]:
csv_list[0:10]

['year,month,date_of_month,day_of_week,births',
 '1994,1,1,6,8096',
 '1994,1,2,7,7772',
 '1994,1,3,1,10142',
 '1994,1,4,2,11248',
 '1994,1,5,3,11053',
 '1994,1,6,4,11406',
 '1994,1,7,5,11251',
 '1994,1,8,6,8653',
 '1994,1,9,7,7910']

## Converting Data Into A List Of Lists

In [3]:
def read_csv(filename):
    string_data = open(filename).read()
    string_list = string_data.split("\n")[1:]
    final_list = []
    
    for row in string_list:
        string_fields = row.split(",")
        int_fields = [] 
        for value in string_fields:  #convert each string in the list to integer
            int_fields.append(int(value))
        final_list.append(int_fields)
    return final_list
        
cdc_list = read_csv("US_births_1994-2003_CDC_NCHS.csv")

In [4]:
cdc_list[0:10]

[[1994, 1, 1, 6, 8096],
 [1994, 1, 2, 7, 7772],
 [1994, 1, 3, 1, 10142],
 [1994, 1, 4, 2, 11248],
 [1994, 1, 5, 3, 11053],
 [1994, 1, 6, 4, 11406],
 [1994, 1, 7, 5, 11251],
 [1994, 1, 8, 6, 8653],
 [1994, 1, 9, 7, 7910],
 [1994, 1, 10, 1, 10498]]

## Calculating Number Of Births Each Month

In [5]:
def month_births(double_list):
    births_per_month={}
    for each_list in double_list:
        if(each_list[1] in births_per_month):
            births_per_month[each_list[1]]+=each_list[4]
        else:
             births_per_month[each_list[1]]=each_list[4]
         
    return(births_per_month)

cdc_month_births=month_births(cdc_list)




            

In [6]:
cdc_month_births

{1: 3232517,
 2: 3018140,
 3: 3322069,
 4: 3185314,
 5: 3350907,
 6: 3296530,
 7: 3498783,
 8: 3525858,
 9: 3439698,
 10: 3378814,
 11: 3171647,
 12: 3301860}

## Calculating Number Of Births Each Day Of Week

In [7]:
def dow_births(double_list):
    births_per_dow={}
    for each_list in double_list:
        if(each_list[3] in births_per_dow):
            births_per_dow[each_list[3]]+=each_list[4]
        else:
            births_per_dow[each_list[3]]=each_list[4]
            
    return(births_per_dow) 

cdc_day_births=dow_births(cdc_list)



                
            

In [8]:
cdc_day_births

{1: 5789166,
 2: 6446196,
 3: 6322855,
 4: 6288429,
 5: 6233657,
 6: 4562111,
 7: 4079723}

## Creating A More General Function

In [9]:
def calc_counts(double_list,column):
    gen_dict={}
    
    for each_list in double_list:
        if(each_list[column] in gen_dict):
            gen_dict[each_list[column]]+=each_list[4]
            
        else:
            gen_dict[each_list[column]]=each_list[4]
            
    return (gen_dict)

cdc_year_births=calc_counts(cdc_list,0)
cdc_month_births=calc_counts(cdc_list,1)
cdc_dom_births=calc_counts(cdc_list,2)
cdc_dow_births=calc_counts(cdc_list,3)        
            

In [10]:
cdc_year_births

{1994: 3952767,
 1995: 3899589,
 1996: 3891494,
 1997: 3880894,
 1998: 3941553,
 1999: 3959417,
 2000: 4058814,
 2001: 4025933,
 2002: 4021726,
 2003: 4089950}

In [11]:
cdc_month_births

{1: 3232517,
 2: 3018140,
 3: 3322069,
 4: 3185314,
 5: 3350907,
 6: 3296530,
 7: 3498783,
 8: 3525858,
 9: 3439698,
 10: 3378814,
 11: 3171647,
 12: 3301860}

In [12]:
cdc_dom_births

{1: 1276557,
 2: 1288739,
 3: 1304499,
 4: 1288154,
 5: 1299953,
 6: 1304474,
 7: 1310459,
 8: 1312297,
 9: 1303292,
 10: 1320764,
 11: 1314361,
 12: 1318437,
 13: 1277684,
 14: 1320153,
 15: 1319171,
 16: 1315192,
 17: 1324953,
 18: 1326855,
 19: 1318727,
 20: 1324821,
 21: 1322897,
 22: 1317381,
 23: 1293290,
 24: 1288083,
 25: 1272116,
 26: 1284796,
 27: 1294395,
 28: 1307685,
 29: 1223161,
 30: 1202095,
 31: 746696}

In [13]:
cdc_dow_births

{1: 5789166,
 2: 6446196,
 3: 6322855,
 4: 6288429,
 5: 6233657,
 6: 4562111,
 7: 4079723}

## calculating min and max for each dictionary

In [14]:
def cal_minmax(dictionary,number=1):
    
    highest=dictionary[number]
    lowest = dictionary[number]
    for key in dictionary:
        if(dictionary[key]>highest):
            highest=dictionary[key]
            max_column=key
            
    for key in dictionary:
        if(dictionary[key]<lowest):
            lowest=dictionary[key] 
            min_column=key
            
    minmax={min_column:lowest,max_column:highest}    
     
    return (minmax)

year_minmax=cal_minmax(cdc_year_births,1994)
month_minmax=cal_minmax(cdc_month_births)
dom_minmax=cal_minmax(cdc_dom_births)
dow_minmax=cal_minmax(cdc_dow_births)



            
        
        

In [15]:
year_minmax

{1997: 3880894, 2003: 4089950}

In [16]:
month_minmax

{2: 3018140, 8: 3525858}

In [17]:
dom_minmax

{18: 1326855, 31: 746696}

In [18]:
dow_minmax

{2: 6446196, 7: 4079723}