# U.S. Medical Insurance Costs

In [1]:
# Module imports
import csv

In [2]:
# Lists variables
ages = []
sexes = []
bmis = []
num_children = []
smoker_status = []
regions = []
insurance_costs = []

In [3]:
# Function for filling the lists
def add_values(lst, lst_name):
    with open("insurance.csv") as insurance_file:
        insurance_reader = csv.DictReader(insurance_file)
        for row in insurance_reader:
            lst.append(row[lst_name])


# Updating data lists
add_values(ages, "age")
add_values(sexes, "sex")
add_values(bmis, "bmi")
add_values(num_children, "children")
add_values(smoker_status, "smoker")
add_values(regions, "region")
add_values(insurance_costs, "charges")

In [4]:
### Data Analysis 1: Average Age of patients

def find_average_age(ages):
    
    # Calculating average age and printing formatted string
    total_age = sum([int(age) for age in ages])
    average_age = total_age / len(ages)
    
    print(f"The average age of patients is {round(average_age, 2)} years.")

# Function Test
find_average_age(ages)

The average age of patients is 39.21 years.


In [5]:
### Data Analysis 2: From where majority of patients are

def find_region_frequency(regions):
    
    # Finding all regions in data
    region_names = set(regions)

    # Making a dict with key as region names and value as num of patient from that particular region using dict comprehension
    region_frequency = {name: regions.count(name) for name in region_names}

    # Sorting and Printing out the data
    for region in sorted(region_frequency.keys()):
        print(f"There are {region_frequency[region]} patients from {region} region.")

# Function Test
find_region_frequency(regions)

There are 324 patients from northeast region.
There are 325 patients from northwest region.
There are 364 patients from southeast region.
There are 325 patients from southwest region.


In [6]:
### Data Analysis 3: Finding Num of Smokers and Non-smokers

def find_smokers(smoker_status):
    
    # Storing num of smokers and non smokers
    num_smoker = smoker_status.count("yes")
    num_non_smoker = smoker_status.count("no")
    
    # Printing formatted text for user
    print(f"There are {num_smoker} smokers and {num_non_smoker} non-smoker patients")
    
    if num_smoker > num_non_smoker:
        print(f"The number of smokers is higher than number of non-smokers by a difference of {num_smoker - num_non_smoker} patients.")
    elif num_smoker < num_non_smoker:
        print(f"The number of non-smokers is higher than number of smokers by a difference of {num_non_smoker - num_smoker} patients.")
    else:
        print("The number of smokers and non-smokers is equal.")
    
# Function Test
find_smokers(smoker_status)

There are 274 smokers and 1064 non-smoker patients
The number of non-smokers is higher than number of smokers by a difference of 790 patients.


In [7]:
### Data Analysis 4: Finding num of males and females

def male_vs_female(sexes):
    
    # Counting males, females and difference between there numbers
    num_males = sexes.count("male")
    num_females = sexes.count("female")
    diff = abs(num_males - num_females)
    
    
    
    print(f"There are {num_males} males and {num_females} females.")
    
    if num_males > num_females:
        print(f"{diff} more males than females.")
    elif num_males < num_females:
        print(f"{diff} more females than males.")
    else:
        print("Males and females equal in number.")
    
    if diff >= 100:
        print("Data Might Be Biased.")
        
    if len(set(sexes)) <= 2:
        print("Data does't accounts for other sex categories.")

# Function Test
male_vs_female(sexes)

There are 676 males and 662 females.
14 more males than females.
Data does't accounts for other sex categories.


In [8]:
### Data Analysis 5: Calculating average insurance cost

def average_cost(insurance_costs):
    total_cost = sum([float(i) for i in insurance_costs])
    average_cost = total_cost / len(insurance_costs)
    print(f"The Average insurance cost is {round(average_cost, 2)} dollars.")
    return average_cost

# Function Test
average_cost(insurance_costs)

The Average insurance cost is 13270.42 dollars.


13270.422265141257

In [9]:
### Data Analysis 6: Calculating min and max insurance cost and Mean Deviation by Mean of the given data

def min_and_max_cost(insurance_costs):
    insurance_costs = [float(i) for i in insurance_costs]
    min_cost = min(insurance_costs)
    max_cost = max(insurance_costs)
    print(f"Minimum insurance cost is: {min_cost} \nMaximum insurance cost is: {max_cost}")

def deviation_by_mean(insurance_costs):
    mean = average_cost(insurance_costs)
    insurance_costs = [float(i) for i in insurance_costs]
    data_diff = [abs(x - mean) for x in insurance_costs]
    
    deviation = sum(data_diff) / len(insurance_costs)
    
    print(f"The Mean Deviation by Mean for the given insurance costs is: {round(deviation, 2)} dollars.")
    
# Function Test
min_and_max_cost(insurance_costs)
deviation_by_mean(insurance_costs)

Minimum insurance cost is: 1121.8739 
Maximum insurance cost is: 63770.42801
The Average insurance cost is 13270.42 dollars.
The Mean Deviation by Mean for the given insurance costs is: 9091.13 dollars.


In [10]:
### Data Analysis 7: Average BMI and min max bmis

def average_bmi(bmis):
    total_bmi = sum([float(i) for i in bmis])
    average_bmi = total_bmi / len(bmis)
    print(f"The Average BMI is {round(average_bmi, 2)}")
    return average_bmi

def min_max_bmi(bmis):
    bmis = [float(i) for i in bmis]
    min_bmi = min(bmis)
    max_bmi = max(bmis)
    print(f"Minimum BMI is: {min_bmi} \nMaximum BMI is: {max_bmi}")

# Function Test
average_bmi(bmis)
min_max_bmi(bmis)

The Average BMI is 30.66
Minimum BMI is: 15.96 
Maximum BMI is: 53.13


In [11]:
### Data Analysis 8: Finding if there is more overwweight or more underweight patients

def find_weight_distribution(bmis):
    
    # Calculating Data
    bmis = [float(i) for i in bmis]
    over_weight = [i for i in bmis if i > 25]
    under_weight = [i for i in bmis if i < 18]
    num_over_weight = len(over_weight)
    num_under_weight = len(under_weight)
    diff = abs(num_over_weight - num_under_weight)
    
    # Printing formatted text
    print(f"There are {num_over_weight} over weight patients and {num_under_weight} under weight patients.")
    
    if num_over_weight > num_under_weight:
        
        if diff > 100:
            print("To many over weight patients.")
            
        print("More number of over weight patients.")
        
    elif num_over_weight == num_under_weight:
        
        print("Equal number of over weight and under weight patients.")
    
    else:
        
        if diff > 100:
            print("To many under weight patients.")
            
        print("More number of under weight patients.")

# Function Test
find_weight_distribution(bmis)

There are 1091 over weight patients and 15 under weight patients.
To many over weight patients.
More number of over weight patients.


---
## Project Conclusion:

These were some implementations of data analysis ideas which came in my mind at the time of completion of this project. There are endless things to analyse from the given data file.

Right now I have implemented 10 functions for the given data. 

Code reviews or further extension ideas are welcomed!

Mail on: armaanbarak@outlook.com

Alternatively, reply on the respective GitHub id or Codecademy id: Armaan_Barak

* * *