# Medical Insurance Project

### Importing pandas and numpy library

In [25]:
import pandas as pd
import numpy as np

insurance_df = pd.read_csv('insurance.csv')

### Creating a Class for the dataframe
Accompanying the class with methods to get various info from the data set

In [26]:
class PatientsInfo:
    def __init__(self, df):
        self.df = df
        self.count = df.count()
    
    def get_age_data(self):
        age = self.df['age']
        aggregate_data = age.describe()
        return aggregate_data.iloc[[1, 3, 5, 7]]
    
    def get_sex_count(self):
        sex = self.df['sex']
        return sex.value_counts()

    def get_bmi_data(self):
        bmi = self.df['bmi']
        aggregate_data = bmi.describe()
        return aggregate_data.iloc[[1, 3, 5, 7]]

    def get_children_count(self):
        children = self.df['children']
        return children.value_counts()
    
    def get_smoker_count(self):
        smoker = self.df['smoker']
        return smoker.value_counts()
    
    def get_region_count(self):
        region = self.df['region']
        return region.value_counts()
    
    def get_charges_data(self):
        charges = self.df['charges']
        aggregate_data = charges.describe()
        return aggregate_data.iloc[[1, 3, 5, 7]]
    
    def get_charges_by_column(self, column_name):
        return self.df.groupby(column_name).agg(average_charge=pd.NamedAgg(column='charges', aggfunc=np.mean))
    
insurance_info = PatientsInfo(insurance_df)

### Getting the count of males and females in the data set

In [27]:
sex_count = insurance_info.get_sex_count()
males_count = sex_count.iloc[0]
females_count = sex_count.iloc[1]
sex_count_dict = {"males": males_count, "females": females_count}

print(f"There are {sex_count_dict['males']} males and {sex_count_dict['females']} females in record.")

There are 676 males and 662 females in record.


### Getting aggregate data about age in the data set

In [28]:
age_data = insurance_info.get_age_data()
age_dict = {row_name: round(age_data.loc[row_name], 2) for row_name in age_data.index}

print(f"The average age and median age are {age_dict['mean']} and {age_dict['50%']} years old, respectively.")
print(f"The youngest client is {age_dict['min']} years old, while the oldest client is {age_dict['max']} years old.")

The average age and median age are 39.21 and 39.0 years old, respectively.
The youngest client is 18.0 years old, while the oldest client is 64.0 years old.


### Getting aggregate data about BMI in the data set

In [89]:
bmi_data = insurance_info.get_bmi_data()
bmi_dict = {row_name: round(bmi_data.loc[row_name], 2) for row_name in bmi_data.index}

print(f"The average BMI and median BMI are {bmi_dict['mean']} and {bmi_dict['50%']}, respectively.")
print(f"The lowest BMI is {bmi_dict['min']} and the highest BMI is {bmi_dict['max']}")

The average BMI and median BMI are 30.66 and 30.4, respectively.
The lowest BMI is 15.96 and the highest BMI is 53.13


### Getting the count of people with children

In [44]:
children_count = insurance_info.get_children_count()
children_dict = {row_name: children_count.loc[row_name] for row_name in children_count.index}

print(f"""
There are {children_dict[0]} people without children, 
{children_dict[1]} people with one child, 
{children_dict[2]} people with two children, 
{children_dict[3]} people with three children, 
{children_dict[4]} people with four children, 
and {children_dict[5]} people with five children."""
      )


There are 574 people without children, 
324 people with one child, 
240 people with two children, 
157 people with three children, 
25 people with four children, 
and 18 people with five children.


### Getting count of smokers in data set

In [90]:
count = insurance_info.count[0]
smoker_count = insurance_info.get_smoker_count()
smoker_dict = {row_name: smoker_count.loc[row_name] for row_name in smoker_count.index}

print(f"Out of {count} people, {smoker_dict['yes']} smoke and {smoker_dict['no']} don't.")

Out of 1338 people, 274 smoke and 1064 don't.


### Getting count of people in different regions

In [49]:
region_count = insurance_info.get_region_count()
region_dict = {row_name: region_count.loc[row_name] for row_name in region_count.index}

print(f"""There are {region_dict['southeast']} people from the South-East, 
{region_dict['southwest']} people from the South-West, 
{region_dict['northwest']} people from the North-West, 
and {region_dict['northeast']} people from the North-East."""
      )

There are 364 people from the South-East, 
325 people from the South-West, 
325 people from the North-West, 
and 324 people from the North-East


### Getting information on the charges in the data set

In [88]:
charges_data = insurance_info.get_charges_data()
charges_dict = {row_name: round(charges_data.loc[row_name], 2) for row_name in charges_data.index}
charges_dict['difference'] = round(charges_dict['mean'] - charges_dict['50%'], 2)

print(f"On average, one person will pay ${charges_dict['mean']}. On the other hand, the median charge for a single person is ${charges_dict['50%']}. There is a difference of ${charges_dict['difference']}.")
print(f"The lowest charged to someone is ${charges_dict['min']}, while the highest charged to someone is ${charges_dict['max']}.")

On average, one person will pay $13270.42. On the other hand, the median charge for a single person is $9382.03. There is a difference of $3888.39.
The lowest charged to someone is $1121.87, while the highest charged to someone is $63770.43.


### Organizing average charge based on the column

In [86]:
charge_by_age = insurance_info.get_charges_by_column('age')
charge_by_age_dict = {row_name: round(charge_by_age.loc[row_name, 'average_charge'], 2) for row_name in charge_by_age.index}

charge_by_sex = insurance_info.get_charges_by_column('sex')
charge_by_sex_dict = {row_name: round(charge_by_sex.loc[row_name, 'average_charge'], 2) for row_name in charge_by_sex.index}

charge_by_children = insurance_info.get_charges_by_column('children')
charge_by_children_dict = {row_name: round(charge_by_children.loc[row_name, 'average_charge'], 2) for row_name in charge_by_children.index}

charge_by_smoker = insurance_info.get_charges_by_column('smoker')
charge_by_smoker_dict = {row_name: round(charge_by_smoker.loc[row_name, 'average_charge'], 2) for row_name in charge_by_smoker.index}

charge_by_region = insurance_info.get_charges_by_column('region')
charge_by_region_dict = {row_name: round(charge_by_region.loc[row_name, 'average_charge'], 2) for row_name in charge_by_region.index}