# U.S. Medical Insurance Costs

In [4]:
import csv
from statistics import mean

# Load data using csv module
with open("insurance.csv", newline="") as f:
    reader = csv.DictReader(f)
    data = list(reader)
    print(len(data))
# Convert numeric fields
for row in data:
    row["age"] = int(row["age"])
    row["bmi"] = float(row["bmi"])
    row["children"] = int(row["children"])
    row["charges"] = float(row["charges"])

# 1. General statistics
ages = [row["age"] for row in data]
bmis = [row["bmi"] for row in data]
charges = [row["charges"] for row in data]

print("Average age:", mean(ages))
print("Average BMI:", mean(bmis))
print("Average charges:", mean(charges))
print("Min charges:", min(charges))
print("Max charges:", max(charges))

# 2. Smoker vs Non-Smoker
smoker_charges = [row["charges"] for row in data if row["smoker"] == "yes"]
nonsmoker_charges = [row["charges"] for row in data if row["smoker"] == "no"]

print("\nAverage charges (smokers):", mean(smoker_charges))
print("Average charges (non-smokers):", mean(nonsmoker_charges))
print("Smokers %:", len(smoker_charges) / len(data) * 100)

# 3. Gender differences
male_charges = [row["charges"] for row in data if row["sex"] == "male"]
female_charges = [row["charges"] for row in data if row["sex"] == "female"]

print("\nAverage charges (male):", mean(male_charges))
print("Average charges (female):", mean(female_charges))

# 4. Impact of children
children_groups = {}
for row in data:
    children = row["children"]
    children_groups.setdefault(children, []).append(row["charges"])

print("\nAverage charges by number of children:")
for c, vals in children_groups.items():
    print(f"{c} children: {mean(vals):.2f}")

# 5. Regional differences
region_groups = {}
for row in data:
    region = row["region"]
    region_groups.setdefault(region, []).append(row["charges"])

print("\nAverage charges by region:")
for region, vals in region_groups.items():
    print(f"{region}: {mean(vals):.2f}")

# 6. Simple correlation check (age vs charges)
# (manual covariance/variance calculation)
def correlation(x, y):
    x_mean, y_mean = mean(x), mean(y)
    num = sum((a - x_mean) * (b - y_mean) for a, b in zip(x, y))
    den_x = sum((a - x_mean) ** 2 for a in x)
    den_y = sum((b - y_mean) ** 2 for b in y)
    return num / (den_x * den_y) ** 0.5

print("\nCorrelation(age, charges):", correlation(ages, charges))
print("Correlation(BMI, charges):", correlation(bmis, charges))


1338
Average age: 39.20702541106129
Average BMI: 30.66339686098655
Average charges: 13270.422265141257
Min charges: 1121.8739
Max charges: 63770.42801

Average charges (smokers): 32050.23183153285
Average charges (non-smokers): 8434.268297856202
Smokers %: 20.47832585949178

Average charges (male): 13956.751177721893
Average charges (female): 12569.578843835347

Average charges by number of children:
0 children: 12365.98
1 children: 12731.17
3 children: 15355.32
2 children: 15073.56
5 children: 8786.04
4 children: 13850.66

Average charges by region:
southwest: 12346.94
southeast: 14735.41
northwest: 12417.58
northeast: 13406.38

Correlation(age, charges): 0.2990081933306477
Correlation(BMI, charges): 0.1983409688336289
