# U.S. Medical Insurance Costs

In [1]:
# import csv
import csv

In [13]:
with open('insurance.csv', 'r') as file:
    reader = csv.DictReader(file)
    data = list(reader)
#     print(data)
    
# printing first 5 rows
for row in data[:5]:
    print(row)

{'age': '19', 'sex': 'female', 'bmi': '27.9', 'children': '0', 'smoker': 'yes', 'region': 'southwest', 'charges': '16884.924'}
{'age': '18', 'sex': 'male', 'bmi': '33.77', 'children': '1', 'smoker': 'no', 'region': 'southeast', 'charges': '1725.5523'}
{'age': '28', 'sex': 'male', 'bmi': '33', 'children': '3', 'smoker': 'no', 'region': 'southeast', 'charges': '4449.462'}
{'age': '33', 'sex': 'male', 'bmi': '22.705', 'children': '0', 'smoker': 'no', 'region': 'northwest', 'charges': '21984.47061'}
{'age': '32', 'sex': 'male', 'bmi': '28.88', 'children': '0', 'smoker': 'no', 'region': 'northwest', 'charges': '3866.8552'}


In [15]:
# Total number of rows
data_length = len(data)
print(data_length)

1338


In [22]:
# column names
print(reader.fieldnames)

# or

columns = data[0].keys()
print("Columns:", columns)


['age', 'sex', 'bmi', 'children', 'smoker', 'region', 'charges']
Columns: dict_keys(['age', 'sex', 'bmi', 'children', 'smoker', 'region', 'charges'])


In [33]:
# Unique values of sex, region, smoker

unique_sex = set()
unique_region = set()
unique_smoker = set()

for row in data:
    unique_sex.add(row['sex'])
    unique_region.add(row['region'])
    unique_smoker.add(row['smoker'])
    
print(f"Unique Sex : {list(unique_sex)}")
print(f"Unique Region : {unique_region}")
print(f"Unique Smoker : {unique_smoker}")

# or

# unique_sex = []
# unique_region = []
# unique_smoker = []

# for row in data:
#     if row['sex'] not in unique_sex:
#         unique_sex.append(row['sex'])
#     if row['region'] not in unique_region:
#         unique_region.append(row['region'])
#     if row['smoker'] not in unique_smoker:
#         unique_smoker.append(row['smoker'])

# print(f"Unique Sex : {unique_sex}")
# print(f"Unique Region : {unique_region}")
# print(f"Unique Smoker : {unique_smoker}")

Unique Sex : ['male', 'female']
Unique Region : {'southeast', 'northeast', 'northwest', 'southwest'}
Unique Smoker : {'no', 'yes'}
Unique Sex : ['female', 'male']
Unique Region : ['southwest', 'southeast', 'northwest', 'northeast']
Unique Smoker : ['yes', 'no']


In [40]:
#  Calculate Average Insurance Cost
total_cost = 0

for row in data:
    total_cost += float(row['charges'])

average_cost = total_cost / len(data)
print("Average Insurance Cost:", round(average_cost, 2))

# or

charges = [float(row['charges']) for row in data]
average = sum(charges) / len(charges)
print("Average Insurance Cost:", round(average, 2))



Average Insurance Cost: 13270.42
Average Insurance Cost: 13270.42


In [44]:
# Find Who Pays More Smokers vs Non-smokers

smoker_cost = 0
smoker_count = 0
non_smoker_cost = 0
non_smoker_count = 0

for row in data:
    cost = float(row['charges'])
    if row["smoker"] == 'yes':
        smoker_cost += cost
        smoker_count += 1
    else:
        non_smoker_cost += cost
        non_smoker_count += 1

print(f"Average Smoker Cost: {smoker_cost / smoker_count}")
print(f"Average Non Smoker Cost: {non_smoker_cost / non_smoker_count}")


Average Smoker Cost: 32050.23183153285
Average Non Smoker Cost: 8434.268297856199


In [48]:
# Find Who Pays More Male vs Female
male_cost = 0
male_count = 0
female_cost = 0
female_count = 0

for row in data:
    cost = float(row['charges'])
    if row['sex'] == 'male':
        male_cost += cost
        male_count += 1
    else:
        female_cost += cost
        female_count += 1
        
print(f'Average Male Cost : {male_cost/male_count}')
print(f'Average Feale Cost : {female_cost/female_count}')

if male_cost / male_count > female_cost / female_count:
    print("Males pay more on average.")
else:
    print("Females pay more on average.")


Average Male Cost : 13956.751177721886
Average Feale Cost : 12569.57884383534
Males pay more on average.


In [52]:
# Group by Region Count how many people are from each region.

region_count = {}
for row in data:
    region = row['region']
    if region not in region_count:
        region_count[region] = 1
    else:
        region_count[region] += 1
        
print(f"People in each region: {region_count}")

People in each region: {'southwest': 325, 'southeast': 364, 'northwest': 325, 'northeast': 324}


In [66]:
# wtiting average age using function
age = [float(row['age']) for row in data]

average_age = sum(age) / len(age)
print(average_age)


total_age = 0
for row in data:
    age = float(row['age'])
    total_age += age

avg_age = total_age / len(data)
print(avg_age)

def get_average_age():
    total_age = 0
    for row in data:
        total_age += int(row['age'])
    return round(total_age / len(data),2)

print("Average Age:", get_average_age())


39.20702541106129
39.20702541106129
Average Age: 39.21


In [69]:
# highest payer
def highest_payer():
    high_payer = 0
    for row in data:
        high_charges = float(row['charges'])
        if high_charges > high_payer:
            high_payer = high_charges
    return high_payer

print(highest_payer())

def highest_payer_person():
    high_payer = 0
    person = None
    for row in data:
        high_charges = float(row['charges'])
        if high_charges > high_payer:
            high_payer = high_charges
            person = row
    return person


hp = highest_payer_person()
print(f"Highest Payer: {hp['sex']}, Age: {hp['age']}, Smoker: {hp['smoker']}, Charges: {hp['charges']}")

63770.42801
Highest Payer: female, Age: 54, Smoker: yes, Charges: 63770.42801


In [71]:
# calculate the oldest person
def oldest_person():
    older = 0
    person = None
    for row in data:
        age = int(row['age'])
        if age > older:
            older = age
            person = row
    return person

op = oldest_person()
print(f"Oldest Person: {op['sex']}, Age: {op['age']}, Smoker: {op['smoker']}, Charges: {op['charges']}")

Oldest Person: male, Age: 64, Smoker: no, Charges: 30166.61817


In [75]:
# People with more than 2 children

def people_with_more_than_two_child():
    child = 2
    people_list = []
    person = None
    
    for row in data:
        child_val = int(row['children'])
        if child_val > child:
            person = row
            people_list.append(person)
            
    return people_list

people = people_with_more_than_two_child()


for item in people:
    print(f"{item['sex']}, Age: {item['age']}, Children: {item['children']}, Charges: {item['charges']}")


male, Age: 28, Children: 3, Charges: 4449.462
female, Age: 37, Children: 3, Charges: 7281.5056
female, Age: 59, Children: 3, Charges: 14001.1338
female, Age: 19, Children: 5, Charges: 4687.797
female, Age: 62, Children: 3, Charges: 15612.19335
female, Age: 40, Children: 3, Charges: 8059.6791
male, Age: 43, Children: 3, Charges: 8606.2174
male, Age: 25, Children: 4, Charges: 4504.6624
male, Age: 28, Children: 3, Charges: 17663.1442
male, Age: 31, Children: 5, Charges: 6799.458
female, Age: 53, Children: 3, Charges: 11741.726
female, Age: 48, Children: 4, Charges: 11033.6617
male, Age: 59, Children: 3, Charges: 30184.9367
female, Age: 54, Children: 3, Charges: 12105.32
female, Age: 61, Children: 3, Charges: 30942.1918
male, Age: 52, Children: 3, Charges: 11488.31695
female, Age: 47, Children: 3, Charges: 10115.00885
female, Age: 54, Children: 3, Charges: 27322.73386
male, Age: 30, Children: 3, Charges: 20745.9891
female, Age: 29, Children: 3, Charges: 5138.2567
male, Age: 46, Children: 3

In [91]:
# Smokers under age 30
def smoker_age():
    smoker = 'yes'
    age = 30
    smoker_list = []
    person = None
    for row in data:
        if smoker == row['smoker'] and age > int(row['age']):
#             person = row
            smoker_list.append(row)
    return smoker_list
persons = smoker_age()

for person in persons:
    print(f"Smoker: {person['smoker']}, Sex : {person['sex']}, Age: {person['age']}, Region: {person['region']}, Charges: {person['charges']}")

Smoker: yes, Sex : female, Age: 19, Region: southwest, Charges: 16884.924
Smoker: yes, Sex : male, Age: 27, Region: southeast, Charges: 39611.7577
Smoker: yes, Sex : male, Age: 22, Region: southwest, Charges: 35585.576
Smoker: yes, Sex : male, Age: 28, Region: southwest, Charges: 51194.55914
Smoker: yes, Sex : male, Age: 18, Region: southeast, Charges: 34303.1672
Smoker: yes, Sex : female, Age: 20, Region: northwest, Charges: 14711.7438
Smoker: yes, Sex : male, Age: 28, Region: southeast, Charges: 17663.1442
Smoker: yes, Sex : female, Age: 27, Region: southeast, Charges: 16577.7795
Smoker: yes, Sex : male, Age: 22, Region: southeast, Charges: 37165.1638
Smoker: yes, Sex : male, Age: 20, Region: northwest, Charges: 17560.37975
Smoker: yes, Sex : female, Age: 29, Region: southeast, Charges: 19107.7796
Smoker: yes, Sex : female, Age: 19, Region: southwest, Charges: 17081.08
Smoker: yes, Sex : male, Age: 18, Region: northeast, Charges: 15518.18025
Smoker: yes, Sex : female, Age: 18, Region