#Introduction To Dataset 

In [1]:
csv_list = open("US_births_1994-2003_CDC_NCHS.csv").read().split("\n")

In [2]:
csv_list[0:10]

['year,month,date_of_month,day_of_week,births',
 '1994,1,1,6,8096',
 '1994,1,2,7,7772',
 '1994,1,3,1,10142',
 '1994,1,4,2,11248',
 '1994,1,5,3,11053',
 '1994,1,6,4,11406',
 '1994,1,7,5,11251',
 '1994,1,8,6,8653',
 '1994,1,9,7,7910']

#Converting Data Into A List Of Lists

In [4]:
def read_csv(filename):
    string_data = open(filename).read()
    string_list = string_data.split("\n")[1:]
    final_list = []
    
    for row in string_list:
        string_fields = row.split(",")
        int_fields = []
        for value in string_fields:
            int_fields.append(int(value))
        final_list.append(int_fields)
    return final_list
        
cdc_list = read_csv("US_births_1994-2003_CDC_NCHS.csv")

In [5]:
cdc_list[0:10]

[[1994, 1, 1, 6, 8096],
 [1994, 1, 2, 7, 7772],
 [1994, 1, 3, 1, 10142],
 [1994, 1, 4, 2, 11248],
 [1994, 1, 5, 3, 11053],
 [1994, 1, 6, 4, 11406],
 [1994, 1, 7, 5, 11251],
 [1994, 1, 8, 6, 8653],
 [1994, 1, 9, 7, 7910],
 [1994, 1, 10, 1, 10498]]

#Calculating Number of Births Each Month

In [6]:
def month_births(data):
    births_per_month = {}
    
    for row in data:
        month = row[1]
        births = row[4]
        if month in births_per_month:
            births_per_month[month] = births_per_month[month] + births
        else:
            births_per_month[month] = births
    return births_per_month
    
cdc_month_births = month_births(cdc_list)

In [8]:
cdc_month_births

{1: 3232517,
 2: 3018140,
 3: 3322069,
 4: 3185314,
 5: 3350907,
 6: 3296530,
 7: 3498783,
 8: 3525858,
 9: 3439698,
 10: 3378814,
 11: 3171647,
 12: 3301860}

#Calculating Number Of Births Each Day Of Week

In [9]:
def dow_births(data):
    births_per_dow = {}
    
    for row in data:
        dow = row[3]
        births = row[4]
        if dow in births_per_dow:
            births_per_dow[dow] = births_per_dow[dow] + births
        else:
            births_per_dow[dow] = births
    return births_per_dow
    
cdc_dow_births = dow_births(cdc_list)

In [10]:
cdc_dow_births

{1: 5789166,
 2: 6446196,
 3: 6322855,
 4: 6288429,
 5: 6233657,
 6: 4562111,
 7: 4079723}

#Creating A More General Function 

In [11]:
def calc_counts(data, column):
    sums_dict = {}
    
    for row in data:
        col_value = row[column]
        births = row[4]
        if col_value in sums_dict:
            sums_dict[col_value] = sums_dict[col_value] + births
        else:
            sums_dict[col_value] = births
    return sums_dict

cdc_year_births = calc_counts(cdc_list, 0)
cdc_month_births = calc_counts(cdc_list, 1)
cdc_dom_births = calc_counts(cdc_list, 2)
cdc_dow_births = calc_counts(cdc_list, 3)

In [12]:
cdc_year_births

{1994: 3952767,
 1995: 3899589,
 1996: 3891494,
 1997: 3880894,
 1998: 3941553,
 1999: 3959417,
 2000: 4058814,
 2001: 4025933,
 2002: 4021726,
 2003: 4089950}

In [13]:
cdc_month_births

{1: 3232517,
 2: 3018140,
 3: 3322069,
 4: 3185314,
 5: 3350907,
 6: 3296530,
 7: 3498783,
 8: 3525858,
 9: 3439698,
 10: 3378814,
 11: 3171647,
 12: 3301860}

In [14]:
cdc_dom_births

{1: 1276557,
 2: 1288739,
 3: 1304499,
 4: 1288154,
 5: 1299953,
 6: 1304474,
 7: 1310459,
 8: 1312297,
 9: 1303292,
 10: 1320764,
 11: 1314361,
 12: 1318437,
 13: 1277684,
 14: 1320153,
 15: 1319171,
 16: 1315192,
 17: 1324953,
 18: 1326855,
 19: 1318727,
 20: 1324821,
 21: 1322897,
 22: 1317381,
 23: 1293290,
 24: 1288083,
 25: 1272116,
 26: 1284796,
 27: 1294395,
 28: 1307685,
 29: 1223161,
 30: 1202095,
 31: 746696}

In [15]:
cdc_dow_births


{1: 5789166,
 2: 6446196,
 3: 6322855,
 4: 6288429,
 5: 6233657,
 6: 4562111,
 7: 4079723}

#Calculating Max & Min Births

In [21]:
def max_min_function(data):
    maxim= None 
    minim= None 
    
    for key in data: 
        
        if maxim == None or data[key] > maxim:
            maxim = data[key]
            
        if minim == None or data[key] < minim:
            minim = data[key]
    
    return {"min":minim, "max":maxim}


min_max_year_births=max_min_function(cdc_year_births)
min_max_month_births=max_min_function(cdc_month_births)
min_max_dom_births=max_min_function(cdc_dom_births)
min_max_dow_births = max_min_function(cdc_dow_births)



In [17]:
min_max_dow_births

{'max': 6446196, 'min': 4079723}

In [19]:
min_max_month_births

{'max': 3525858, 'min': 3018140}

In [22]:
min_max_dom_births

{'max': 1326855, 'min': 746696}

In [23]:
min_max_dow_births

{'max': 6446196, 'min': 4079723}

#Extracting Difference Of the Same Value Across Different Years

In [62]:
def extract(data,column,wanted_value):
    November={}
    for each in data:
        columndata = each[column]
        birthcount = each[4] 
        year= each[0]
        
        if columndata == wanted_value:
            if year in November:
                November[year] = November[year] + birthcount 
            else:
                November[year] = birthcount
    i=1994
    for dictionary in November:
        if i<=2002:
            print("difference between births in January between", i+1, "and", i, "is")
            print(November[int(i+1)]-November[int(i)])
            i=i+1


In [None]:
#Extracting Differences In Births In January Across Years

In [63]:
November = extract(cdc_list,1,1)

difference between births in January between 1995 and 1994 is
-4692
difference between births in January between 1996 and 1995 is
-1730
difference between births in January between 1997 and 1996 is
2928
difference between births in January between 1998 and 1997 is
2129
difference between births in January between 1999 and 1998 is
-158
difference between births in January between 2000 and 1999 is
10926
difference between births in January between 2001 and 2000 is
5090
difference between births in January between 2002 and 2001 is
-4524
difference between births in January between 2003 and 2002 is
-871


In [64]:
def merge_births_average(list1,list2):

    truncated_list_1 = []
    truncated_list_2 = []
    for item in list1:
       truncated_list_1.append(item[0:len(item)-1])
    for item in list2:
       truncated_list_2.append(item[0:len(item)-1])

    for item in list1:
        if [item[0],item[1],item[2],item[3]] in truncated_list_2:

            index1 = truncated_list_1.index([item[0],item[1],item[2],item[3]])
            index2 = truncated_list_2.index([item[0],item[1],item[2],item[3]])
            list1[index1][4] += list2[index2][4]/2

    return list1
