# U.S. Medical Insurance Costs

In [69]:
import csv

In [71]:
## List of columns

In [73]:
# Initialize lists to store the data from each column of the CSV file

age = []
sex = []
bmi = []
children = []
smoker = []
region = []
charges = []

# Open and read the 'insurance.csv' file using the csv.DictReader to parse the rows as dictionaries
with open('insurance.csv', newline ='') as insurance_csv:
    insurance_reader = csv.DictReader(insurance_csv)
    for row in insurance_reader:
        age.append(int(row['age']))       # Convert age to integer
        sex.append(row['sex'])            # Store sex as a string
        bmi.append(float(row['bmi']))     # Convert BMI to float
        children.append(row['children'])  # Store children count (keep as string for now)
        smoker.append(row['smoker'])      # Store smoking status as string
        region.append(row['region'])      # Store region as string
        charges.append(float(row['charges']))  # Convert charges to float

In [75]:
##Average Security cost by region:

In [77]:
# 'size' holds the total number of records (rows)
size = len(charges) 

# Create a dictionary to store total charges per region and another to count how many times each region occurs
charge_by_region_dict = {} 
region_repeats = {}

# Loop through the data to accumulate charges and occurrences per region
for i in range(size):
    if region[i] not in charge_by_region_dict:
        charge_by_region_dict[region[i]] = 0  # Initialize the region charge
        region_repeats[region[i]] = 0         # Initialize the region count

    region_repeats[region[i]] += 1            # Count each occurrence of the region
    charge_by_region_dict[region[i]] += charges[i]  # Sum up the charges for the region

# Calculate the average cost for each region and round to 3 decimal places
for item in charge_by_region_dict:
    charge_by_region_dict[item] /= region_repeats[item]  # Divide by the number of occurrences
    charge_by_region_dict[item] = round(charge_by_region_dict[item], 3)  # Round the result

# Print the average insurance cost by region
print(charge_by_region_dict)

{'southwest': 12346.937, 'southeast': 14735.411, 'northwest': 12417.575, 'northeast': 13406.385}


In [79]:
## Average security cost by age

In [81]:
# Create dictionaries to store the total charges per age and the count of occurrences per age
charge_by_age_dict = {}
age_repeats = {}

# Loop through the data to accumulate charges and occurrences per age
for i in range(size):
    if age[i] not in charge_by_age_dict:
        charge_by_age_dict[age[i]] = 0  # Initialize the age charge
        age_repeats[age[i]] = 0         # Initialize the age count

    age_repeats[age[i]] += 1            # Count each occurrence of the age
    charge_by_age_dict[age[i]] += charges[i]  # Sum up the charges for the age

# Calculate the average cost for each age and round to 2 decimal places
for item in charge_by_age_dict:
    charge_by_age_dict[item] /= age_repeats[item]  # Divide by the number of occurrences
    charge_by_age_dict[item] = round(charge_by_age_dict[item], 2)  # Round the result

# Sort the dictionary by age and print the average insurance cost by age
print(dict(sorted(charge_by_age_dict.items())))

{18: 7086.22, 19: 9747.91, 20: 10159.7, 21: 4730.46, 22: 10012.93, 23: 12419.82, 24: 10648.02, 25: 9838.37, 26: 6133.83, 27: 12184.7, 28: 9069.19, 29: 10430.16, 30: 12719.11, 31: 10196.98, 32: 9220.3, 33: 12351.53, 34: 11613.53, 35: 11307.18, 36: 12204.48, 37: 18019.91, 38: 8102.73, 39: 11778.24, 40: 11772.25, 41: 9653.75, 42: 13061.04, 43: 19267.28, 44: 15859.4, 45: 14830.2, 46: 14342.59, 47: 17654.0, 48: 14632.5, 49: 12696.01, 50: 15663.0, 51: 15682.26, 52: 18256.27, 53: 16020.93, 54: 18758.55, 55: 16164.55, 56: 15025.52, 57: 16447.19, 58: 13878.93, 59: 18895.87, 60: 21979.42, 61: 22024.46, 62: 19163.86, 63: 19885.0, 64: 23275.53}


In [83]:
#Difference between men and woman costs

In [85]:
# Initialize variables to store total costs and counts for both men and women
woman_costs = 0
men_costs = 0
men_count = 0
woman_count = 0

# Loop through the data and accumulate the costs and counts for men and women separately
for i in range(len(charges)):
    if sex[i] == 'female':           # Check if the person is female
        woman_count += 1
        woman_costs += charges[i]     # Sum up the charges for women
    else:
        men_count += 1
        men_costs += charges[i]       # Sum up the charges for men

# Print the average cost for men and women, rounded to 2 decimal places
print(f'Average cost for men: {round(men_costs/men_count, 2)}')
print(f'Average cost for women: {round(woman_costs/woman_count, 2)}')


Average cost for men: 13956.75
Average cost for women: 12569.58


In [87]:
#Relation between smokers and number of children

In [89]:
# Initialize variables to calculate total number of children for smokers and non-smokers
smokers_children = 0
non_smokers_children = 0
smokers_count = 0
non_smokers_count = 0

# Iterate over the dataset and accumulate the number of children based on smoker status
for i in range(size):
    if smoker[i] == 'yes':
        smokers_children += int(children[i])  # Add to total children for smokers
        smokers_count += 1  # Increment count for smokers
    else:
        non_smokers_children += int(children[i])  # Add to total children for non-smokers
        non_smokers_count += 1  # Increment count for non-smokers

# Calculate and print the average number of children for smokers and non-smokers
avg_smokers_children = round(smokers_children / smokers_count, 2)
avg_non_smokers_children = round(non_smokers_children / non_smokers_count, 2)

print(f"Average number of children for smokers: {avg_smokers_children}")
print(f"Average number of children for non-smokers: {avg_non_smokers_children}")



Average number of children for smokers: 1.11
Average number of children for non-smokers: 1.09


In [91]:
##Results

In [93]:
results = {
    'cost_by_region': charge_by_region_dict,
    'cost_by_age': charge_by_age_dict,
    'cost_per_gender': {
        'men': men_avg_cost,
        'woman': woman_avg_cost
    }
}

print(results)

{'cost_by_region': {'southwest': 12346.937, 'southeast': 14735.411, 'northwest': 12417.575, 'northeast': 13406.385}, 'cost_by_age': {19: 9747.91, 18: 7086.22, 28: 9069.19, 33: 12351.53, 32: 9220.3, 31: 10196.98, 46: 14342.59, 37: 18019.91, 60: 21979.42, 25: 9838.37, 62: 19163.86, 23: 12419.82, 56: 15025.52, 27: 12184.7, 52: 18256.27, 30: 12719.11, 34: 11613.53, 59: 18895.87, 63: 19885.0, 55: 16164.55, 22: 10012.93, 26: 6133.83, 35: 11307.18, 24: 10648.02, 41: 9653.75, 38: 8102.73, 36: 12204.48, 21: 4730.46, 48: 14632.5, 40: 11772.25, 58: 13878.93, 53: 16020.93, 43: 19267.28, 64: 23275.53, 20: 10159.7, 61: 22024.46, 44: 15859.4, 57: 16447.19, 29: 10430.16, 45: 14830.2, 54: 18758.55, 49: 12696.01, 47: 17654.0, 51: 15682.26, 42: 13061.04, 50: 15663.0, 39: 11778.24}, 'cost_per_gender': {'men': 13956.75, 'woman': 12569.58}}
