# U.S. Medical Insurance Costs

The first step in our project is to import the csv module and the file that contains our data

In [1]:
import csv

insurance_csv = open('insurance.csv', newline='')
insurance_reader = csv.DictReader(insurance_csv) # there is no need to use the delimiter parameter since ',' is the default value

We need to save our data (each column) in an appropriate Python data structures 

We can start by building the Person class for creating the instance of a person in our data set

In [2]:
class Person:
    def __init__(self, age, sex, bmi, children, smoker, region, charges, id = None):
        self.age = age
        self.sex = sex
        self.bmi = bmi
        self.children = children
        self.smoker = smoker
        self.region = region
        self.charges = charges
        self.id = id
    def __repr__(self):
        return "The person with the id of {id} is a {age}'s old {sex} from {region}. With a BMI of {bmi}, a number of children of {children}, a smoking state of {smoker} and charges value of {charges}".format(id=self.id, age=self.age, sex=self.sex, region=self.region, bmi=self.bmi, children=self.children, smoker=self.smoker, charges=self.charges)

Next, we need to :
* Create different lists for the different columns  
* Get the data for each person and store it in a Person's instance
* Regroup all instances in a `people_list`
* Transform the different instances into a Python dictionary

In [3]:
# creating a list for each column
age_list, sex_list, bmi_list, children_list, smoker_list, region_list, charges_list = [], [], [], [], [], [], []
length = 0
for row in insurance_reader:
    age_list.append(int(row['age']))
    sex_list.append(row['sex'])
    bmi_list.append(float(row['bmi']))
    children_list.append(int(row['children']))
    smoker_list.append(row['smoker'])
    region_list.append(row['region'])
    charges_list.append(float(row['charges']))
    length += 1

# adding an id sequence
ids = range(1, length+1)

# creating people_list
people_list = []
for i in ids:
    people_list.append(Person(age_list[i-1], sex_list[i-1], bmi_list[i-1], children_list[i-1], smoker_list[i-1], region_list[i-1], charges_list[i-1], i))

# creating people_dict
people_dict = {}
for person in people_list:
    people_dict[person.id] = {
        'age': person.age, 
        'sex': person.sex, 
        'bmi': person.bmi, 
        'children': person.children, 
        'smoker': person.smoker, 
        'charges': person.charges
    }

insurance_csv.close()


Now, we have all the data we need in appropriate data structures, it's time to begin the analysis.

In [4]:
# defining functions

def average(input_list):
    sum = 0
    i = 0
    for data in input_list:
        sum += data
        i += 1
    return sum / i

# average age of entries

def average_age_all(list):
    print(f"The average age in our entries is {average(list)}")
average_age_all(age_list)

# highest efective region

def highest_effective_region(input_list):
    region_dict = {}
    for region in input_list:
        try:
            region_dict[region] += 1
        except KeyError:
            region_dict[region] = 0
    max_effective = 0
    related_region = None
    for region in region_dict:
        if region_dict[region] > max_effective:
            max_effective = region_dict[region]
            related_region = region
    print(f"The region with the most effective is {related_region} with an effective of {max_effective}")
    return region_dict
region_effective = highest_effective_region(region_list)
    
# average age of someone with n-children

def average_age_with_nth_children(input_list, n):
    average_age = 0
    people_counter = 0
    for person in input_list:
        if person.children == n:
            average_age += person.age
            people_counter += 1
    try:
        average_age /= people_counter
    except ZeroDivisionError:
        average_age = 0
    print(f"The average age of somebody having {n} children is {average_age}")
average_age_with_nth_children(people_list, 2)

# region with the highest smoking rate

def highest_smoking_rate_region(people_list):
    # we create a dictionary to hold the effective of smoker in each region
    region_smoker = {}
    for person in people_list:
        if person.smoker == 'no':
            continue
        try:
            region_smoker[person.region] += 1
        except KeyError:
            region_smoker[person.region] = 0
    # we divide each effective by the total effective to get the smoking rate
    highest_smoking_rate = 0
    for region in region_smoker:
        region_smoker[region] /= region_effective[region]
        if region_smoker[region] > highest_smoking_rate:
            highest_smoking_rate = region_smoker[region]
            related_region = region
    print(f"The region with the highest smoking rate is {related_region} with a rate of {highest_smoking_rate}")
    return region_smoker
region_with_highest_smoking_rate = highest_smoking_rate_region(people_list)

# region with the highest birth rate

def highest_birth_rate_region(people_list):
    # we create a dictionary to hold the effective of children in each region
    region_children = {}
    for person in people_list:
        try:
            region_children[person.region] += person.children
        except KeyError:
            region_children[person.region] = person.children
    # we divide each effective by the effective of people to get the birth rate
    highest_rate = 0
    for region in region_children:
        region_children[region] /= region_effective[region]
        if region_children[region] > highest_rate:
            highest_rate = region_children[region]
            related_region = region
    print(f"The region with the highest birth rate is {related_region} with a rate of {highest_rate}")
    return region_children
region_with_highest_birth_rate = highest_birth_rate_region(people_list)




The average age in our entries is 39.20702541106129
The region with the most effective is southeast with an effective of 363
The average age of somebody having 2 children is 39.44583333333333
The region with the highest smoking rate is southeast with a rate of 0.24793388429752067
The region with the highest birth rate is northwest with a rate of 1.1512345679012346
