# U.S. Medical Insurance Costs

In [2]:
import csv

In [3]:
def load_insurance_data(file_path):
    """
    Loads insurance data from a CSV file into a dictionary.

    Args:
        file_path (str): Path to the CSV file.

    Returns:
        dict: A dictionary containing insurance data categorized by columns.
    """
    insurance_dict = {
        'age': [], 'sex': [], 'bmi': [], 'children': [],
        'smoker': [], 'region': [], 'charges': []
    }

    with open(file_path, newline='') as insurance_csv:
        insurance_reader = csv.DictReader(insurance_csv)
        for row in insurance_reader:
            for key in insurance_dict:
                insurance_dict[key].append(row[key])

    return insurance_dict

In [4]:
def calculate_average_charge(insurance_dict):
    """
    Calculates the average insurance charge.

    Args:
        insurance_dict (dict): Insurance data dictionary.

    Returns:
        float: The average charge rounded to 3 decimal places.
    """
    total_charges = sum(float(charge) for charge in insurance_dict['charges'])
    return round(total_charges / len(insurance_dict['charges']), 3)


In [5]:
def calculate_average_bmi_by_region(insurance_dict):
    """
    Calculates the average BMI for each region.

    Args:
        insurance_dict (dict): Insurance data dictionary.

    Returns:
        dict: A dictionary containing average BMI values for each region.
    """
    # Initialize totals
    regions_bmi= {}
    
    # Loop through the regions and calculate BMI sums
    for i in range(len(insurance_dict['region'])):
        region = insurance_dict['region'][i]
        bmi = float(insurance_dict['bmi'][i]) 

        if region not in regions_bmi:
            regions_bmi[region] = {'total_bmi': bmi, 'count': 1}
        else: 
            regions_bmi[region]['total_bmi'] += bmi
            regions_bmi[region]['count'] += 1
    
    # return the averages safely 

    for region, value in regions_bmi.items():
        if value['count'] > 0:
            value['average_bmi']= round(value['total_bmi'] / value['count'], 2)
            value.pop('total_bmi')
            
    return regions_bmi


In [6]:
def calculate_avg_insurance_of_smokers_and_non_smokers_by_region(insurance_dict):
    """
    Calculates the average insurance charges for smokers and non-smokers in each region.

    Args:
        insurance_dict (dict): Insurance data dictionary.

    Returns:
        tuple: Two dictionaries containing average charges for smokers and non-smokers by region.
    """
    total_smoker_charges = {}
    smoker_by_region = {}
    total_non_smoker_charges = {}
    non_smoker_by_region = {}

    for i in range(len(insurance_dict['region'])):
        smoker = insurance_dict['smoker'][i]
        region = insurance_dict['region'][i]
        charge = float(insurance_dict['charges'][i])

        if smoker == 'yes':
            total_smoker_charges[region] = total_smoker_charges.get(region, 0) + charge
            if region not in smoker_by_region:
                smoker_by_region[region] = {'number_of_smokers': 0, 'smokers_average_charge': None}
            smoker_by_region[region]['number_of_smokers'] += 1
        else:
            total_non_smoker_charges[region] = total_non_smoker_charges.get(region, 0) + charge
            if region not in non_smoker_by_region:
                non_smoker_by_region[region] = {'number_of_non_smokers': 0, 'non_smokers_average_charge': None}
            non_smoker_by_region[region]['number_of_non_smokers'] += 1

    for region in smoker_by_region:
        smoker_by_region[region]['smokers_average_charge'] = round(
            total_smoker_charges[region] / smoker_by_region[region]['number_of_smokers'], 3
        )

    for region in non_smoker_by_region:
        non_smoker_by_region[region]['non_smokers_average_charge'] = round(
            total_non_smoker_charges[region] / non_smoker_by_region[region]['number_of_non_smokers'], 3
        )

    return smoker_by_region, non_smoker_by_region


In [7]:
def average_insurance_gender(insurance_dict):
    """
    Calculates and returns the average insurance charges based on gender and whether individuals have children.
    
    Parameters:
        insurance_dict (dict): A dictionary containing insurance data with keys like 'sex', 'children', and 'charges'.
    
    Returns:
        dict: A dictionary containing average charges for various gender-based categories.
    """
    # Initialize totals and counters
    total_male_charge = 0
    male_counter = 0
    total_female_charge = 0
    female_counter = 0
    total_male_with_children = 0
    male_with_children_counter = 0
    total_female_with_children = 0
    female_with_children_counter = 0
    total_male_childfree = 0
    male_childfree_counter = 0
    total_female_childfree = 0
    female_childfree_counter = 0
    
    # Iterate through the dataset
    for i in range(len(insurance_dict['sex'])):
        gender = insurance_dict['sex'][i]
        charge = float(insurance_dict['charges'][i])   # Convert charge to float 
        children = int(insurance_dict['children'][i])  # Ensure children is an integer

        if gender == 'male':
            total_male_charge += charge
            male_counter += 1
            if children > 0:
                total_male_with_children += charge
                male_with_children_counter += 1
            else:
                total_male_childfree += charge
                male_childfree_counter += 1

        elif gender == 'female':
            total_female_charge += charge
            female_counter += 1
            if children > 0:
                total_female_with_children += charge
                female_with_children_counter += 1
            else:
                total_female_childfree += charge
                female_childfree_counter += 1

    # Calculate averages safely
    averages = {
        'male_average_charge': round(total_male_charge / male_counter, 3) if male_counter > 0 else 0,
        'female_average_charge': round(total_female_charge / female_counter, 3) if female_counter > 0 else 0,
        'male_with_children_average_charge': round(total_male_with_children / male_with_children_counter, 3) if male_with_children_counter > 0 else 0,
        'female_with_children_average_charge': round(total_female_with_children / female_with_children_counter, 3) if female_with_children_counter > 0 else 0,
        'male_childfree_average_charge': round(total_male_childfree / male_childfree_counter, 3) if male_childfree_counter > 0 else 0,
        'female_childfree_average_charge': round(total_female_childfree / female_childfree_counter, 3) if female_childfree_counter > 0 else 0
    }

    return averages

In [8]:
def avg_insurance_by_num_of_children(insurance_dict):
    # Initialize dictionaries to store total charges and counts
    insurance_by_children_num = {}
    num_of_rep = {}
    
    for i in range(len(insurance_dict['children'])):
        # Extract number of children and charge
        num_children = int(insurance_dict['children'][i])
        charge = float(insurance_dict['charges'][i])
        
        # Update total charges for the specific number of children
        if num_children in insurance_by_children_num:
            insurance_by_children_num[num_children] += charge
            num_of_rep[num_children] += 1  # Increment count
        else:
            # Initialize both total charges and count
            insurance_by_children_num[num_children] = charge
            num_of_rep[num_children] = 1

    # Calculate averages and sort keys
    avg_insurance_by_children_num = {}
    for key, total_charge in insurance_by_children_num.items():
        avg_insurance_by_children_num[key] = round(total_charge / num_of_rep[key], 3)
    avg_insurance_by_children_num = dict(sorted(avg_insurance_by_children_num.items()))
    avg_ins= {'number_of_children '+str(key): avg_insurance_by_children_num[key] for key in avg_insurance_by_children_num }
    return avg_ins

In [9]:
def avg_insurance_and_bmi_by_age(insurance_dict):
    # Initialize dictionaries for totals and counts
    age_total_charge_dict = {
        '18-25': 0.0,
        '26-35': 0.0,
        '36-45': 0.0,
        '46-55': 0.0,
        '56-65': 0.0
    }
    num_of_age_groups = {
        '18-25': 0,
        '26-35': 0,
        '36-45': 0,
        '46-55': 0,
        '56-65': 0
    }
    age_bmi_dict = {
        '18-25': 0.0,
        '26-35': 0.0,
        '36-45': 0.0,
        '46-55': 0.0,
        '56-65': 0.0
    }
    
    # Iterate through the dataset
    for i in range(len(insurance_dict['age'])):
        age = int(insurance_dict['age'][i])
        charge = float(insurance_dict['charges'][i])
        bmi = float(insurance_dict['bmi'][i])
        
        # Assign data to the appropriate age group
        if 18 <= age <= 25:
            group = '18-25'
        elif 26 <= age <= 35:
            group = '26-35'
        elif 36 <= age <= 45:
            group = '36-45'
        elif 46 <= age <= 55:
            group = '46-55'
        elif 56 <= age <= 65:
            group = '56-65'
        else:
            continue  # Skip ages outside the specified range
        
        age_total_charge_dict[group] += charge
        age_bmi_dict[group] += bmi
        num_of_age_groups[group] += 1

    # Calculate averages
    insurance_and_bmi_by_age_group = {}
    for group in age_total_charge_dict.keys():
        count = num_of_age_groups[group]
        if count > 0:  # Avoid division by zero
            insurance_and_bmi_by_age_group[group] = {
                'average_charge': round(age_total_charge_dict[group] / count, 3),
                'average_bmi': round(age_bmi_dict[group] / count, 2)
            }
        else:
            insurance_and_bmi_by_age_group[group] = {
                'average_charge': 0.0,
                'average_bmi': 0.0
            }
    
    return insurance_and_bmi_by_age_group


In [10]:
# Define BMI categories
bmi_categories = {
    'underweight': 'Less than 18.5',
    'healthy weight': '18.5 to less than 25',
    'overweight': '25 to less than 30',
    'obesity': {
        'Class 1 Obesity': '30 to less than 35',
        'Class 2 Obesity': '35 to less than 40',
        'Class 3 Obesity': '40 or greater'
    }
}

def init_bmi_category_count():
    """
    Initializes the structure for BMI category counts.
    
    Returns:
        dict: A dictionary with BMI categories and initial counts set to 0.
    """
    return {
        'underweight': 0,
        'healthy weight': 0,
        'overweight': 0,
        'obesity': {
            'Class 1 Obesity': 0,
            'Class 2 Obesity': 0,
            'Class 3 Obesity': 0
        }
    }

def bmi_smoking_relationship(insurance_dict):
    """
    Analyzes the relationship between BMI categories and smoking status.
    
    Args:
        insurance_dict (dict): A dictionary containing 'smoker' and 'bmi' data.

    Returns:
        dict: Summary of smokers and non-smokers BMI analysis.
    """
    num_smokers = 0
    num_non_smokers = 0
    total_smokers_bmi = 0
    total_non_smokers_bmi = 0

    # Initialize category counts
    smoker_bmi_category_count = init_bmi_category_count()
    non_smoker_bmi_category_count = init_bmi_category_count()

    for i in range(len(insurance_dict['smoker'])):
        smoke = insurance_dict['smoker'][i]
        bmi = float(insurance_dict['bmi'][i])

        if smoke == 'yes':
            num_smokers += 1
            total_smokers_bmi += bmi
            if bmi < 18.5:
                smoker_bmi_category_count['underweight'] += 1
            elif 18.5 <= bmi < 25:
                smoker_bmi_category_count['healthy weight'] += 1
            elif 25 <= bmi < 30:
                smoker_bmi_category_count['overweight'] += 1
            elif 30 <= bmi < 35:
                smoker_bmi_category_count['obesity']['Class 1 Obesity'] += 1
            elif 35 <= bmi < 40:
                smoker_bmi_category_count['obesity']['Class 2 Obesity'] += 1
            elif bmi >= 40:
                smoker_bmi_category_count['obesity']['Class 3 Obesity'] += 1

        elif smoke == 'no':
            num_non_smokers += 1
            total_non_smokers_bmi += bmi
            if bmi < 18.5:
                non_smoker_bmi_category_count['underweight'] += 1
            elif 18.5 <= bmi < 25:
                non_smoker_bmi_category_count['healthy weight'] += 1
            elif 25 <= bmi < 30:
                non_smoker_bmi_category_count['overweight'] += 1
            elif 30 <= bmi < 35:
                non_smoker_bmi_category_count['obesity']['Class 1 Obesity'] += 1
            elif 35 <= bmi < 40:
                non_smoker_bmi_category_count['obesity']['Class 2 Obesity'] += 1
            elif bmi >= 40:
                non_smoker_bmi_category_count['obesity']['Class 3 Obesity'] += 1

    # Calculate averages
    average_smokers_bmi = round(total_smokers_bmi / num_smokers, 3) if num_smokers > 0 else 0.0
    average_non_smokers_bmi = round(total_non_smokers_bmi / num_non_smokers, 3) if num_non_smokers > 0 else 0.0

    # Prepare results
    results = {
        "num_smokers": num_smokers,
        "num_non_smokers": num_non_smokers,
        "average_smokers_bmi": average_smokers_bmi,
        "average_non_smokers_bmi": average_non_smokers_bmi,
        "smoker_bmi_category_count": smoker_bmi_category_count,
        "non_smoker_bmi_category_count": non_smoker_bmi_category_count
    }
    return results

In [11]:
def top_5_charges(insurance_dict):
    top5={1 : 0, 2 : 0, 3 : 0, 4 : 0, 5 : 0 }
    top5_info= {1: None , 2: None , 3: None , 4: None, 5: None  }
    
    for i in range(len(insurance_dict['charges'])):
        charge= float(insurance_dict['charges'][i])
        if charge >  top5[1]:
            top5[1]= charge
            top5_info[1] = {key: insurance_dict[key][i] for key in insurance_dict}
        elif charge > top5[2]:
            top5[2]= charge
            top5_info[2] = {key: insurance_dict[key][i] for key in insurance_dict}
        elif charge > top5[3]:
            top5[3]= charge
            top5_info[3] = {key: insurance_dict[key][i] for key in insurance_dict}
        elif charge > top5[4]:
            top5[4]= charge
            top5_info[4] = {key: insurance_dict[key][i] for key in insurance_dict}
        elif charge > top5[5]:
            top5[5]= charge
            top5_info[5] = {key: insurance_dict[key][i] for key in insurance_dict}
    top= {'Charge Rank Number '+ str(key): top5_info[key] for key in top5_info}
    return top

In [12]:
def calculate_average_age(insurance_dict):
    total_ages= 0
    ages_count= 0
    for i in range(len(insurance_dict['age'])):
        total_ages += int(insurance_dict['age'][i])
        ages_count += 1
    average_age= round(total_ages/ ages_count, 2)
    return average_age      
        

In [13]:
def average_age_by_num_of_children(insurance_dict):
    age_num_children= {}

    for i in range(len(insurance_dict['age'])):
        children= int(insurance_dict['children'][i])
        age= int(insurance_dict['age'][i])
        if children in age_num_children:
            age_num_children[children]['total_ages'] += age
            age_num_children[children]['count'] += 1
        else: age_num_children[children]= {'total_ages': age, 'count': 1}
    
    for children, data in age_num_children.items():
        if data['count'] >= 0:
            average_ages= round(data['total_ages'] / data['count'])
            data['average_age']= average_ages
    sorted_age_num_children = {key: age_num_children[key] for key in sorted(age_num_children)}
    age_num_child= {'Number of Children: '+ str(key): sorted_age_num_children[key] for key in sorted_age_num_children}
    return age_num_child
   

In [15]:
def main():
    """
    Main function to execute the analysis.
    """
    # Load data
    insurance_dict = load_insurance_data('data/insurance.csv')

    # Calculate and print average charge
    avg_charge = calculate_average_charge(insurance_dict)
    print(f"Average Insurance Charge: {avg_charge}")

    # Calculate and print average BMI by region
    avg_bmis = calculate_average_bmi_by_region(insurance_dict)
    print("Average BMI by Region:", avg_bmis)

    # Calculate and print average charges for smokers and non-smokers by region
    smoker_data, non_smoker_data = calculate_avg_insurance_of_smokers_and_non_smokers_by_region(insurance_dict)
    print("Smoker Data by Region:", smoker_data)
    print("Non-Smoker Data by Region:", non_smoker_data)
    
    # BMI-smoking relationship
    bmi_smoking_data = bmi_smoking_relationship(insurance_dict)
    for key, value in bmi_smoking_data.items():
        print(f"{key}: {value}")
    # Average insurance and bmi by age
    bmi_and_insurance_by_age = avg_insurance_and_bmi_by_age(insurance_dict)
    for key, value in bmi_and_insurance_by_age.items():
        print(f"{key}: {value}")
    
    # Avgerage insurance by number of children
    charges_by_children_num = avg_insurance_by_num_of_children(insurance_dict)
    for key, value in charges_by_children_num.items():
        print(f"{key}: {value}")

    # Top 5 charges
    top_5= top_5_charges(insurance_dict)
    for key, value in top_5.items():
        print(f"{key}: {value}")

    # Average insurance by gender and children
    gender_average_charges = average_insurance_gender(insurance_dict) 
    for key, value in gender_average_charges.items():
        print(f"{key}: {value}")

    # Calculate the average age 
    avg_age = calculate_average_age(insurance_dict)
    print(f"The Average Age: {avg_age}")

    # Average age by number of children
    avg_age_by_num_of_children = average_age_by_num_of_children(insurance_dict)
    for key, value in avg_age_by_num_of_children.items():
        print(f"{key}: {value}")
        
if __name__ == "__main__":
    main()

Average Insurance Charge: 13270.422
Average BMI by Region: {'southwest': {'count': 325, 'average_bmi': 30.6}, 'southeast': {'count': 364, 'average_bmi': 33.36}, 'northwest': {'count': 325, 'average_bmi': 29.2}, 'northeast': {'count': 324, 'average_bmi': 29.17}}
Smoker Data by Region: {'southwest': {'number_of_smokers': 58, 'smokers_average_charge': 32269.063}, 'southeast': {'number_of_smokers': 91, 'smokers_average_charge': 34844.997}, 'northeast': {'number_of_smokers': 67, 'smokers_average_charge': 29673.536}, 'northwest': {'number_of_smokers': 58, 'smokers_average_charge': 30192.003}}
Non-Smoker Data by Region: {'southeast': {'number_of_non_smokers': 273, 'non_smokers_average_charge': 8032.216}, 'northwest': {'number_of_non_smokers': 267, 'non_smokers_average_charge': 8556.464}, 'northeast': {'number_of_non_smokers': 257, 'non_smokers_average_charge': 9165.532}, 'southwest': {'number_of_non_smokers': 267, 'non_smokers_average_charge': 8019.285}}
num_smokers: 274
num_non_smokers: 1064