
# U.S. Medical Insurance Costs

## Overall Project Goals
The goal of this project is to draw conclusions about the relationships between region, bmi, number of children, and other statistics listed in the csv file, insurance.csv. 

### Individual Tasks
1. Add clarifying markdown cells before each code cell to explain what that code cell is doing
2. Format data into initial dictionary
3. Determine differences in average numbers of children in different regions
4. Determine average age for each of the four regions
5. Format age and region relationships into a dictionary
6. Find relationships (if any) between gender and region and number of children. 
7. Determine impact of having children on smoker status


#### Tells the computer to bring in the csv module so that I can use csv methods on the insurance file

In [6]:
import csv


#### Uses _with_  and _as_  to help _open_  the insurance.csv file and temporarily rename it for easier use. Then creates a python dictionary called _insurance_dict_ withs keys for each of the categories in the (now named) _insurance_csv_ file. Finally _appends_ data from the different insurance categories to separate lists. 

In [7]:
insurance_age_list = []
insurance_sex_list = []
insurance_bmi_list = []
insurance_num_children_list = []
insurance_smoker_list = []
insurance_region_list = []
insurance_charges_list = []
with open("insurance.csv") as insurance_csv:
    insurance_dict = csv.DictReader(insurance_csv)   
    for row in insurance_dict:
        insurance_age_list.append(row["age"])
        insurance_sex_list.append(row["sex"])
        insurance_bmi_list.append(row["bmi"])
        insurance_num_children_list.append(row["children"])
        insurance_smoker_list.append(row["smoker"])
        insurance_region_list.append(row["region"])
        insurance_charges_list.append(row["charges"])


#### Function for counting number of people in the data set from each region  listed in the csv file

In [8]:
def region_count(insurance_list, region_list):
    ne_count = 0
    nw_count = 0
    se_count = 0
    sw_count = 0
    for index in range(len(insurance_list)):
        if region_list[index] == "northeast":
            ne_count += 1
        elif region_list[index] == "northwest":
            nw_count += 1
        elif region_list[index] == "southeast":
            se_count += 1
        else:
            sw_count += 1
    return ne_count, nw_count, se_count, sw_count
regional_count = region_count(insurance_age_list, insurance_region_list)
print(regional_count)

(324, 325, 364, 325)


#### Determines the total number of children in each region

In [9]:
def region_children_count(region_list, children_list):
    ne_children_count = 0
    nw_children_count = 0
    se_children_count = 0
    sw_children_count = 0
    for index in range(len(insurance_age_list)):
        if region_list[index] == "northeast":
            ne_children_count += int(insurance_num_children_list[index])
        elif region_list[index] == "northwest":
            nw_children_count += int(insurance_num_children_list[index])            
        elif region_list[index] == "southeast":
            se_children_count += int(insurance_num_children_list[index])           
        elif region_list[index] == "southwest":
            sw_children_count += int(insurance_num_children_list[index])
    return ne_children_count, nw_children_count, se_children_count, sw_children_count
regional_children_count_list = region_children_count(insurance_region_list, insurance_num_children_list)
print(regional_children_count_list)

(339, 373, 382, 371)



#### Finds the average number of children per person in each region, and ranks those averages by region

In [10]:
def average_children_regionIndex(regionIndex):
    return regional_children_count_list[regionIndex] / regional_count[regionIndex]

ne_average_children = average_children_regionIndex(0)
nw_average_children = average_children_regionIndex(1)
se_average_children = average_children_regionIndex(2)
sw_average_children = average_children_regionIndex(3)

average_children_list = [ne_average_children, nw_average_children, se_average_children, sw_average_children]

average_children_ranked_list = sorted(average_children_list)

average_children_dict = {"northeast": ne_average_children, "southeast": se_average_children, "southwest": sw_average_children, "northwest": nw_average_children}
print(average_children_dict)

{'northeast': 1.0462962962962963, 'southeast': 1.0494505494505495, 'southwest': 1.1415384615384616, 'northwest': 1.1476923076923078}



#### Determines the combined age in each region

In [11]:
def region_age_count(region_list, age):
    ne_age_count = 0
    nw_age_count = 0
    se_age_count = 0
    sw_age_count = 0
    for index in range(len(insurance_age_list)):
        if region_list[index] == "northeast":
            ne_age_count += int(insurance_age_list[index])
        elif region_list[index] == "northwest":
            nw_age_count += int(insurance_age_list[index])            
        elif region_list[index] == "southeast":
            se_age_count += int(insurance_age_list[index])           
        elif region_list[index] == "southwest":
            sw_age_count += int(insurance_age_list[index])
    return ne_age_count, nw_age_count, se_age_count, sw_age_count
regional_age_count_list = region_age_count(insurance_region_list, insurance_age_list)
print(regional_age_count_list)

(12723, 12739, 14174, 12823)



#### Determines the average age for each region

In [12]:
def average_age_regionIndex(regionIndex):
    return regional_age_count_list[regionIndex] / regional_count[regionIndex]

ne_average_age = average_age_regionIndex(0)
nw_average_age = average_age_regionIndex(1)
se_average_age = average_age_regionIndex(2)
sw_average_age = average_age_regionIndex(3)

average_age_list = [ne_average_age, nw_average_age, se_average_age, sw_average_age]

average_age_ranked_list = sorted(average_age_list)

average_age_dict = {"southeast": se_average_age, "northwest": nw_average_age, "northeast": ne_average_age, "southwest": sw_average_age}
print(average_age_dict)

{'southeast': 38.93956043956044, 'northwest': 39.19692307692308, 'northeast': 39.26851851851852, 'southwest': 39.45538461538462}



#### Calculates the total number of males and females in each of the four regions. 

In [13]:
def region_sex_totals(sex_list, region_list):
    ne_m_count = 0
    ne_f_count = 0
    nw_m_count = 0
    nw_f_count = 0
    se_m_count = 0
    se_f_count = 0
    sw_m_count = 0
    sw_f_count = 0
    for index in range(len(insurance_age_list)):
        if region_list[index] == "northeast" and sex_list[index] == "male":
            ne_m_count += 1
        elif region_list[index] == "northeast" and sex_list[index] == "female":
            ne_f_count += 1
        elif region_list[index] == "northwest" and sex_list[index] == "male":
            nw_m_count += 1
        elif region_list[index] == "northwest" and sex_list[index] == "female":
            nw_f_count += 1
        elif region_list[index] == "southeast" and sex_list[index] == "male":
            se_m_count += 1
        elif region_list[index] == "southeast" and sex_list[index] == "female":
            se_f_count += 1
        elif region_list[index] == "southwest" and sex_list[index] == "male":
            sw_m_count += 1
        elif region_list[index] == "southwest" and sex_list[index] == "female":
            sw_f_count += 1
    return ne_m_count, ne_f_count, nw_m_count, nw_f_count, se_m_count, se_f_count, sw_m_count, sw_f_count
region_sex_count = region_sex_totals(insurance_sex_list, insurance_region_list)
region_sex_totals_dict = {"northeast": {"male": region_sex_count[0], "female": region_sex_count[1]}, 
                          "northwest": {"male": region_sex_count[2], "female": region_sex_count[3]},
                          "southeast": {"male": region_sex_count[4], "female": region_sex_count[5]},
                          "southwest": {"male": region_sex_count[6], "female": region_sex_count[7]}}
print(region_sex_totals_dict)


{'northeast': {'male': 163, 'female': 161}, 'northwest': {'male': 161, 'female': 164}, 'southeast': {'male': 189, 'female': 175}, 'southwest': {'male': 163, 'female': 162}}



#### Calculates the total number of children for each gender within each region

In [14]:
def region_sex_total_children(sex_list, region_list, region, sex):
    total = 0
    for index in range(len(region_list)):
        if region_list[index] == region and sex_list[index] == sex:
            total += int(insurance_num_children_list[index])
    return total
region_sex_total_children_dict = {}
unique_regions = ["northeast", "northwest", "southeast", "southwest"]
listed_sexes = ["male", "female"]

for region in unique_regions:
    for sex in listed_sexes:
        if sex not in region_sex_total_children_dict.keys():
            region_sex_total_children_dict.update({sex: {region: region_sex_total_children(insurance_sex_list, insurance_region_list, region, sex)}})
        elif sex in region_sex_total_children_dict.keys():
            region_sex_total_children_dict[sex].update({region: region_sex_total_children(insurance_sex_list, insurance_region_list, region, sex)})
print(region_sex_total_children_dict)
            

{'male': {'northeast': 177, 'northwest': 190, 'southeast': 198, 'southwest': 189}, 'female': {'northeast': 162, 'northwest': 183, 'southeast': 184, 'southwest': 182}}



#### Finds the average number of children per person, separated by region and sex

In [19]:
def averages_dictionary(children_dict, people_dict, sex):
    average_dict = {}
    average = 0
    for region in unique_regions:
        average = region_sex_total_children_dict[sex][region] / region_sex_totals_dict[region][sex]
        if sex not in average_dict.keys():
            average_dict.update({sex: {region: average}})
        if sex in average_dict.keys():
            average_dict[sex].update({region: average})
    return average_dict
        
average_children_male = averages_dictionary(region_sex_total_children_dict, region_sex_totals_dict, "male")
average_children_female = averages_dictionary(region_sex_total_children_dict, region_sex_totals_dict, "female")
    
print(average_children_male)
print(average_children_female)

{'male': {'northeast': 1.0858895705521472, 'northwest': 1.1801242236024845, 'southeast': 1.0476190476190477, 'southwest': 1.1595092024539877}}
{'female': {'northeast': 1.0062111801242235, 'northwest': 1.1158536585365855, 'southeast': 1.0514285714285714, 'southwest': 1.123456790123457}}



#### Finds average number of children, separated only by sex

In [27]:
male_children_total = 0
female_children_total = 0

for region in unique_regions:
    male_children_total += average_children_male["male"][region]
    female_children_total += average_children_female["female"][region]
    
male_children_average = male_children_total / 4
female_children_average = female_children_total / 4

print("The average number of children that the men listed in the csv file is " + str(male_children_average))
print("The average number of children that the women listed in the csv file is " + str(female_children_average))
print("On average, the men listed in the csv file have " + str(male_children_average - female_children_average) + " more children than the women listed in the csv file.")

The average number of children that the men listed in the csv file is 1.1182855110569168
The average number of children that the women listed in the csv file is 1.0742375500532093
On average, the men listed in the csv file have 0.04404796100370745 more children than the women listed in the csv file.



#### Finds total number of children for the smokers and  non smokers in the csv file

In [34]:
def smoker_total_children(smoker_list, children_list):
    smoker_children_total = 0
    n_smoker_children_total = 0
    for index in range(len(smoker_list)):
        if smoker_list[index] == "yes":
            smoker_children_total += int(children_list[index])
        elif smoker_list[index] == "no":
            n_smoker_children_total += int(children_list[index])
    return smoker_children_total, n_smoker_children_total 

smoker_children_total, n_smoker_children_total = smoker_total_children(insurance_smoker_list, insurance_num_children_list)
print(smoker_children_total)
print(n_smoker_children_total)

305
1160



#### Counts number of smokers and non smokers

In [38]:
def smoker_count(smoker_list):
    smoker_count = 0
    n_smoker_count = 0
    for smoker in smoker_list:
        if smoker == "yes":
            smoker_count += 1
        elif smoker == "no":
            n_smoker_count += 1
    return smoker_count, n_smoker_count

smoker_count, n_smoker_count = smoker_count(insurance_smoker_list)
print(smoker_count)
print(n_smoker_count)

274
1064



#### Finds the average number of children for the smokers and nonsmoker in the csv file

In [39]:
average_children_smoker = smoker_children_total / smoker_count
average_children_n_smoker = n_smoker_children_total / n_smoker_count
print(average_children_smoker)
print(average_children_n_smoker)

1.1131386861313868
1.0902255639097744
