# U.S. Medical Insurance Costs

In [None]:
import csv

#import insurance data to list
def import_csv(file):
    with open('insurance.csv') as insurance_data:
        reader = csv.DictReader(insurance_data)
        insurance_info = []
        for row in reader:
            insurance_info.append(row)
    return insurance_info

insurance_info = import_csv('insurance.csv') 


## Classifying Customers

The below block of code organizes the customers in the data set into categories based on BMI classifications from the US Center for Disease Control.
<hr>

In [None]:
underweight_BMIs = []
healthy_BMIs = []
overweight_BMIs = []
obese_1_BMIs = []
obese_2_BMIs = []
obese_3_BMIs = []

for customer in range(len(insurance_info)):
    if float(insurance_info[customer]['bmi']) < 18.5:
        underweight_BMIs.append(insurance_info[customer])
    elif float(insurance_info[customer]['bmi']) > 18.5 and float(insurance_info[customer]['bmi']) < 25:
        healthy_BMIs.append(insurance_info[customer])
    elif float(insurance_info[customer]['bmi']) > 25 and float(insurance_info[customer]['bmi']) < 30:
        overweight_BMIs.append(insurance_info[customer])
    elif float(insurance_info[customer]['bmi']) > 30 and float(insurance_info[customer]['bmi']) < 35:
        obese_1_BMIs.append(insurance_info[customer])
    elif float(insurance_info[customer]['bmi']) > 35 and float(insurance_info[customer]['bmi']) < 40:
        obese_2_BMIs.append(insurance_info[customer])
    else:
        obese_3_BMIs.append(insurance_info[customer])  

<hr>

## Charges By Class 

The section below determines and prints the average charges per CDC classification, rounded to two decimal places for ease of reading. After determining the average for each classification, it determines which classification has the highest overall average and the lowest overall average.

<hr>

In [None]:
def average_cost(classification):
    total_charges = 0
    average_charges = 0
    for customer in range(len(classification)):
        total_charges += float(classification[customer]['charges'])
    average_charges = round((total_charges / len(classification)), 2)
    return average_charges

charges_by_class = {}
charges_by_class['Underweight'] = average_cost(underweight_BMIs)
charges_by_class['Healthy'] = average_cost(healthy_BMIs)
charges_by_class['Overweight'] = average_cost(overweight_BMIs)
charges_by_class['Obese Class 1'] = average_cost(obese_1_BMIs)
charges_by_class['Obese Class 2'] = average_cost(obese_2_BMIs)
charges_by_class['Obese Class 3 - Severe Obesity'] = average_cost(obese_3_BMIs)

for classification in charges_by_class:
    print("The average charge for customers with a BMI classified as " + classification + " is $" + str(charges_by_class[classification]) + ".\n") 

lowest_charge = min(charges_by_class.values())
lowest_class = [key for key, value in charges_by_class.items() if value == lowest_charge]

print("\n\nThe " + lowest_class[0] + " class has the lowest average BMI at $" + str(lowest_charge) + ".")

highest_charge = max(charges_by_class.values())
highest_class = [key for key, value in charges_by_class.items() if value == highest_charge]

print("\n\nThe " + highest_class[0] + " class has the lowest average BMI at $" + str(highest_charge) + ".")

<hr>

## BMI By Region

The section below determines and prints the average BMI for each residential region in the data set. After determining the average for each region, the code will determine which has the highest overall average and the lowest overall average. 

<hr>

In [None]:
bmi_by_region= {}
average_counter = {}
for customer in range(len(insurance_info)):
    if insurance_info[customer]['region'] not in bmi_by_region:
        bmi_by_region[(insurance_info[customer]['region'].title())] = float(insurance_info[customer]['bmi'])
        average_counter[(insurance_info[customer]['region'].title())] = 1
    else:
        bmi_by_region[(insurance_info[customer]['region'].title())] += float(insurance_info[customer]['bmi'])
        average_counter[(insurance_info[customer]['region'].title())] += 1

for i in bmi_by_region:
    bmi_by_region[i] = bmi_by_region[i] / average_counter[i]
    bmi = round(bmi_by_region[i], 2)
    region = i

    if bmi_by_region[i] < 18.5:
        classification = "underweight"
    elif bmi_by_region[i] > 18.5 and bmi_by_region[i] < 25:
        classification = "healthy"
    elif bmi_by_region[i] > 25 and bmi_by_region[i] < 30:
        classification = "overweight"
    elif bmi_by_region[i] > 30 and bmi_by_region[i] < 35:
        classification = "obese (class 1)"
    elif bmi_by_region[i] > 35 and bmi_by_region[i] < 40:
        classification = "obese (class 2)"
    else:
        classification = "obese (class 3 - severe)"
        
    BMI_summary = f"The {region}, on average, is {classification} with a BMI of {bmi}.\n"
    print(BMI_summary)

highest_BMI = max(bmi_by_region.values())
highest_region = [key for key, value in bmi_by_region.items() if value == highest_BMI]

print("\n\nThe " + highest_region[0] + " has the highest average BMI at " + str(highest_BMI) + ".")

lowest_BMI = min(bmi_by_region.values())
lowest_region = [key for key, value in bmi_by_region.items() if value == lowest_BMI]

print("\n\nThe " + lowest_region[0] + " has the lowest average BMI at " + str(lowest_BMI) + ".")

<hr>

## Charges by Region
The section below determines the average insurance charges for each region, then determines which region has the highest average charges and which has the lowest average charges.

<hr>

In [None]:
charges_by_region= {}
average_counter = {}
for customer in range(len(insurance_info)):
    if insurance_info[customer]['region'] not in charges_by_region:
        charges_by_region[(insurance_info[customer]['region'].title())] = float(insurance_info[customer]['charges'])
        average_counter[(insurance_info[customer]['region'].title())] = 1
    else:
        charges_by_region[(insurance_info[customer]['region'].title())] += float(insurance_info[customer]['charges'])
        average_counter[(insurance_info[customer]['region'].title())] += 1

for i in charges_by_region:
    charges_by_region[i] = charges_by_region[i] / average_counter[i]
    charges = round(charges_by_region[i], 2)
    region = i
        
    charges_summary = f"The {region} has an average charge of ${charges}.\n"
    print(charges_summary)

highest_charges = max(charges_by_region.values())
highest_region_charges = [key for key, value in charges_by_region.items() if value == highest_charges]

print("\n\nThe " + highest_region_charges[0] + " has the highest average charges with $" + str(round(highest_charges, 2)) + ".")

lowest_charges = min(charges_by_region.values())
lowest_region_charges = [key for key, value in charges_by_region.items() if value == lowest_charges]

print("\n\nThe " + lowest_region_charges[0] + " has the lowest average charges with $"  + str(round(lowest_charges, 2)) + ".")

In [None]:
smokers_by_region = {}
for customer in range(len(insurance_info)):
    if insurance_info[customer]['region'] not in charges_by_region:
        smokers_by_region[(insurance_info[customer]['region'].title())] = 0
        if insurance_info[customer]['smoker'] == "yes":
            smokers_by_region[(insurance_info[customer]['region'].title())] += 1
        else:
            continue
    else:
        if insurance_info[customer]['smoker'] == "yes":
            smokers_by_region[(insurance_info[customer]['region'].title())] += 1
        else:
            continue

print(smokers_by_region)