# U.S. Medical Insurance Costs

In [5]:
import csv

In [6]:
insdata = []
with open("insurance.csv") as insfile:
    for x in insfile.readlines():
        string = x
        stringlst = string.split(",")
        stringlst[-1] = stringlst[-1][:-2] #take away the '/n' characters
        #print(stringlst)
        if stringlst[0] == "age":
            continue
        dict = {"Age": stringlst[0], "Sex": stringlst[1], "BMI": stringlst[2], "Number of Children": stringlst[3], "Smoker": stringlst[4], "Region": stringlst[5], "Charge": stringlst[6]}
        #print(dict)
        insdata.append(dict)
        #print(insdata)

In [7]:
def avg(subjects, key): #subjects is a list of the insurance holders (that are dictionaries), and specifies what key to compare
    if len(subjects) == 0:
        print("No subjects in sample.")
        return 0
    total = 0
    for x in range(len(subjects)):
        total += float(subjects[x][key])
    avg = total / len(subjects)
    print("The average {key} with this sample is {avg}".format(key = key, avg = avg))
    return avg

In [8]:
avg(insdata, "Charge")
avg(insdata, "Age")
avg(insdata, "BMI")
avg(insdata, "Number of Children")

The average Charge with this sample is 13244.379334013456
The average Age with this sample is 39.20702541106129
The average BMI with this sample is 30.663396860986538
The average Number of Children with this sample is 1.0949177877429


1.0949177877429

In [9]:
def buildSample(insdata, key, keyvalue): #key & keyvalue are used to find a new sample given the specific parameters, i.e. find a sample where it only contains people with 1 child only
    #recall that insdata is a list of dictionaries
    #also recall that the values in the insdata dictionaries are ALL strings
    newsample = []
    for x in range(len(insdata)):
        if insdata[x][key] != keyvalue:
            continue #invalid data, move on to next person
        #valid data, add the dictionary to the new sample
        #print(insdata[x])
        newsample.append(insdata[x])
    return newsample

In [10]:
ParentOf2 = buildSample(insdata, "Number of Children", "2")
avg(ParentOf2, "Charge")
avg(ParentOf2, "Age")
avg(ParentOf2, "BMI")
avg(ParentOf2, "Number of Children")

The average Charge with this sample is 14928.391841249999
The average Age with this sample is 39.44583333333333
The average BMI with this sample is 30.97810416666667
The average Number of Children with this sample is 2.0


2.0

In [11]:
def buildMultiVariSample(insdata, keys, keyvalues): #keys & keyvalues are used to find a new sample given the specific parameters, i.e. find a sample where it only contains people with 1 child only
    #modified version where this takes in lists of parameters rather than a single one
    #recall that insdata is a list of dictionaries
    #also recall that the values in the insdata dictionaries are ALL strings
    newsample = []
    if len(keys) != len(keyvalues):
        print(len(keys), len(keyvalues))
        print("Invalid Parameters")
        return newsample
    for x in range(len(insdata)):
        count = 0
        for y in range(len(keys)): 
            if insdata[x][keys[y]] != keyvalues[y]:
                break
            count +=1 #if count gets to be as long as the key length, then that measn that data set is valid
        if count != len(keys):
            continue #invalid data, move on to next person
        #valid data, add the dictionary to the new sample
        #print(insdata[x])
        newsample.append(insdata[x])
    return newsample

In [12]:
f31 = buildMultiVariSample(insdata, ["Age", "Sex"], ["31", "female"])
avg(f31, "Charge")
avg(f31, "BMI")
avg(f31, "Number of Children")

The average Charge with this sample is 8744.111253846153
The average BMI with this sample is 29.50884615384615
The average Number of Children with this sample is 1.0769230769230769


1.0769230769230769

In [13]:
def rangedSample(insdata, key, minval, maxval): #takes in the insurance data, a key for the dictionary, and 2 number values for the key
    sample = []
    for x in range(minval, maxval+1):
        sample += buildSample(insdata, key, str(x))
    return sample

In [14]:
Age25to43 = rangedSample(insdata, "Age", 25, 43)
avg(Age25to43, "Charge")
avg(Age25to43, "Age")
avg(Age25to43, "BMI")
avg(Age25to43, "Number of Children")

The average Charge with this sample is 11423.286593849203
The average Age with this sample is 33.88690476190476
The average BMI with this sample is 30.19545634920634
The average Number of Children with this sample is 1.3908730158730158


1.3908730158730158

In [15]:
#Now that we have our prototype functions, lets create a class so we can do some more interesting things with this
#Mainly, instead of using BuildMultiVariSample, what if we had an easier way of combining samples by overloading the 
#operators. To do this we will need a class to create objects
class Insurance:
    def __init__(self):
        self.insdata = []
        self.defaultdata = []
        #this entire section should only happen once
        with open("insurance.csv") as insfile:
            count = 0
            for x in insfile.readlines():
                string = x
                stringlst = string.split(",")
                stringlst[-1] = stringlst[-1][:-2] #take away the '/n' characters
                #print(stringlst)
                if stringlst[0] == "age":
                    continue
                count += 1 #this will be the ID of the person which will make it easier for some functions
                dict = {"ID": str(count), "Age": stringlst[0], "Sex": stringlst[1], "BMI": stringlst[2], "Number of Children": stringlst[3], "Smoker": stringlst[4], "Region": stringlst[5], "Charge": stringlst[6]}
                #print(dict)
                self.defaultdata.append(dict)
                #print(insdata)
                
    #def defaultSample(self): #the only time default data is ever called, uses more memory but a lot faster
     #   if len(self.defaultdata) > 0 
     #       self.insdata = self.defaultdata
        
    
    def buildSample(self, key, keyvalue): #key & keyvalue are used to find a new sample given the specific parameters, i.e. find a sample where it only contains people with 1 child only
        #recall that insdata is a list of dictionaries
        #also recall that the values in the insdata dictionaries are ALL strings
        
        self.insdata = []
        
        for x in range(len(self.defaultdata)):
            if self.defaultdata[x][key] != keyvalue:
                continue #invalid data, move on to next person
            #valid data, add the dictionary to the new sample
            self.insdata.append(self.defaultdata[x])
        
    
    def rangedSample(self, key, minval, maxval): #takes in the insurance data, a key for the dictionary, and 2 number values for the key
        sample = []
        for x in range(minval, maxval+1):
            self.buildSample(key, str(x)) #recall taht buildSample first refreshes all its lost memory
            sample += self.insdata
            
        self.insdata = sample
        
    def avg(self, key): #subjects is a list of the insurance holders (that are dictionaries), and specifies what key to compare
        if len(self.insdata) == 0:
            print("No subjects in sample.")
            return 0
        total = 0
        for x in range(len(self.insdata)):
            total += float(self.insdata[x][key])
        avg = total / len(self.insdata)
        print("The average {key} with this sample is {avg}".format(key = key, avg = avg))
        return avg
    
    def __IADD__(self, other): #unionize with another sample, check for duplicates
        for x in range(other.insdata):
            if other.insdata[x] in self.insdata:
                continue
            self.insdata.append(other.insdata[x])
        return self
        
    def __IMUL__(self, other): #check only for ones that are the shared, this will overwrite the first object
        #pass
        combined = []
        for x in range(len(self.insdata)):
            if self.insdata[x] in other.insdata:
                combined.append(self.insdata[x])
        self.insdata = combined
        return self
    
    def __mul__(self, other):
        newsample = Insurance()
        newsample.insdata = []
        
        for x in range(len(self.insdata)):
            if self.insdata[x] in other.insdata:
                newsample.insdata.append(self.insdata[x])
        return newsample
    
    def __add__(self, other):
        newsample = Insurance()
        newsample.insdata = self.insdata
        
        for x in range(len(other.insdata)):
            if other.insdata[x] in self.insdata:
                continue
            newsample.insdata.append(other.insdata[x])
        return newsample

In [16]:
test = Insurance()
test.buildSample("Sex", "female")
#print(test.insdata)
print("Female data:")
test.avg("Charge")
test.avg("Age")
test.avg("BMI")
test.avg("Number of Children")

test2 = Insurance()
test2.rangedSample("BMI", 20, 23)
#print(test2.insdata)
print("\nBMI 20-23 data:")
test2.avg("Charge")
test2.avg("Age")
test2.avg("BMI")
test2.avg("Number of Children")

test3 = Insurance()
test3.buildSample("Number of Children", "5")
#print(test3.insdata)
print("\nParent of 5 children data:")
test3.avg("Charge")
test3.avg("Age")
test3.avg("BMI")
test3.avg("Number of Children")

test4 = test2 + test3
#print(test4.insdata)
print("\nBMI 20-23 and/or Parent of 5 data:")
test4.avg("Charge")
test4.avg("Age")
test4.avg("BMI")
test4.avg("Number of Children")

test5 = test * test3
#print(test5.insdata)
print("\nFemale and Mother of 5 data:")
test.avg("Charge")
test.avg("Age")
test.avg("BMI")
test.avg("Number of Children")

test *= test2
#print(test.insdata)
print("\nFemale & BMI 20-23 data:")
test.avg("Charge")
test.avg("Age")
test.avg("BMI")
test.avg("Number of Children")


Female data:
The average Charge with this sample is 12569.575015271894
The average Age with this sample is 39.503021148036254
The average BMI with this sample is 30.377749244713023
The average Number of Children with this sample is 1.0740181268882176

BMI 20-23 data:
The average Charge with this sample is 7029.585
The average Age with this sample is 37.0
The average BMI with this sample is 22.5
The average Number of Children with this sample is 2.0

Parent of 5 children data:
The average Charge with this sample is 8786.024088888887
The average Age with this sample is 35.611111111111114
The average BMI with this sample is 29.605000000000004
The average Number of Children with this sample is 5.0

BMI 20-23 and/or Parent of 5 data:
The average Charge with this sample is 8610.38018
The average Age with this sample is 35.75
The average BMI with this sample is 28.894499999999994
The average Number of Children with this sample is 4.7

Female and Mother of 5 data:
The average Charge with this 

4.777777777777778