# Project: US Medical Insurance Cost Analysis

## Step 1: Look over the dataset
### Step 1.1: Import the dataset and utils.py

In [1]:
import csv
import utils


### Step 1.2: Create empty list for each variable column from the dataset
Seven empty lists were created to store the data extracted from the insurance.csv file for each column. The variable names were chosen to reflect the type of data stored in each list.

In [2]:
patients_age = []
patients_sex = []
patients_bmi = []
patients_children = []
patients_smoker = []
patients_region = []
patients_charges = []

### Step 1.3: Define a function that will extract the data in the insurance.csv file 
The function will be used to extract individual columns from the csv file and store the data in the list created in step 1.2 respectively

In [3]:
def extract_data(column_list, csv_file, column_name):
    with open(csv_file) as csv_data:
        csv_dict = csv.DictReader(csv_data)
        for row in csv_dict:
            column_list.append(row[column_name])

    return None

In [4]:
extract_data(patients_age, 'insurance.csv', 'age')
extract_data(patients_sex, 'insurance.csv', 'sex')
extract_data(patients_bmi, 'insurance.csv', 'bmi')
extract_data(patients_children, 'insurance.csv', 'children')
extract_data(patients_smoker, 'insurance.csv', 'smoker')
extract_data(patients_region, 'insurance.csv', 'region')
extract_data(patients_charges, 'insurance.csv', 'charges')

## Step 2: Analysis of the Dataset
### Step 2.1: Create an instance of the summary statistics class created in the utils.py file

In [5]:
summary_statistics_obj = utils.SummaryStatistics()

### 2.2: Find out the average age of the patients in the dataset
Summary statistics of age of the patients

In [6]:
summary_statistics_obj.mean(patients_age, 'age')

'The average age of the patients is 39 years!'

In [7]:
summary_statistics_obj.range(patients_age, 'age')

'The patient age is within the range of 46.0'

Summary statistics of the bmi of the patients

In [8]:
summary_statistics_obj.mean(patients_bmi, 'bmi')

'The average bmi of the patients is 30.66!'

In [9]:
summary_statistics_obj.range(patients_bmi, 'bmi')

'The patient bmi value is within the range of 37.17'

Analysis of the amount of money the patients spend on insurance

In [10]:
summary_statistics_obj.mean(patients_charges, 'charges')

'The average charge of the patients is 13270.42!'

### Step 2.3: Analyse the data based on location

In [11]:
region_obj = utils.PatientsByRegion()

In [12]:
region_obj.analyse_region(patients_region)

The total number of patients from southwest is 325
The total number of patients from southeast is 364
The total number of patients from northwest is 325
The total number of patients from northeast is 324


### Step 2.4: Look at the different costs between smokers and non-smokers

In [23]:
smoke_patient_obj = utils.PatientThatSmoke()
total_charges = smoke_patient_obj.charge(patients_smoker, patients_charges)
num_smokers = region_obj.region(patients_smoker)

total_smokers = num_smokers['yes']
total_non_smokers = num_smokers['no']

print('-------------Total number of patients who either smoke or not----------------')


print('The number of people who smoke are {0}'.format(total_smokers))
print('The number of people who do not smoke are {0}'.format(total_non_smokers))

print()
print('-------------Average expenses on health insurance base on smoking status-----------------------')

av_charge_s = round((total_charges[0] / num_smokers['yes']), 2)
av_charge_ns = round((total_charges[-1] / num_smokers['no']), 2)

print("The average money spent on insurance by patient who smoke is {0}".format(av_charge_s))
print("The average money spent on insurance by patient who do not smoke is {0}".format(av_charge_ns))



-------------Total number of patients who either smoke or not----------------
The number of people who smoke are 274
The number of people who do not smoke are 1064

-------------Average expenses on health insurance base on smoking status----------------
The average money spent on insurance by patient who smoke is 32050.23
The average money spent on insurance by patient who do not smoke is 8434.27
