# U.S. Medical Insurance Costs

## by Mikheltodd

### Importing Dataset

In [1]:
import csv
with open('insurance.csv', newline='') as csvfile:
    reader = csv.DictReader(csvfile)
    patients_data = {}
    id_patient = 1
    for patient in reader:
        patients_data[id_patient] = patient
        id_patient += 1

### Type Casting Data

In [2]:
for patient in patients_data.values():
    # Age
    patient['Age'] = int(patient['age'])
    del patient['age']
    # Sex
    patient['Sex'] = patient['sex']
    del patient['sex']
    # BMI
    patient['BMI'] = float(patient['bmi'])
    del patient['bmi']
    # No. of Children
    patient['No. of Children'] = int(patient['children'])
    del patient['children']
    # Smoker?
    patient['Smoker?'] = patient['smoker']
    del patient['smoker']
    # Region
    patient['Region'] = patient['region']
    del patient['region']
    # Insurance Cost
    patient['Insurance Cost'] = float(patient['charges'])
    del patient['charges']

### Testing Imported Data

In [3]:
# print('Patients Data')
# print('-'*30)
# for id_p, patient in patients_data.items():
#     print(f'ID: {id_p}')
#     print(f"Age: {patient['Age']}")
#     print(f"Sex: {patient['sex']}")
#     print(f"BMI: {patient['BMI']}")
#     print(f"Children: {patient['No. of Children']}")
#     print(f"Smoker?: {patient['smoker']}")
#     print(f"Region: {patient['region']}")
#     print(f"Charges: $ {patient['Insurance Cost']}")
#     print('-'*30)

### Functions

In [4]:
# Variable to List
# This is a helper function to create a list of elements for each variable
def variable_to_list(patients_data, variable):
    variable_list = []
    for patient in patients_data.values():
        variable_list.append(patient[variable])
    return variable_list
    
# Analysis of cuantitative variable
# This function is used to calculate average, min and max of a cuantitative variable
def cuantitative_analysis(patients_data, variable):
    variable_list = variable_to_list(patients_data, variable)
    variable_analysis_results = {'Average': 0, 'Minimum': float('inf'), 'Maximum': 0}
    for x_variable in variable_list:
        # Average Age Calculation (Sum)
        variable_analysis_results['Average'] += x_variable        
        # Min. Age
        if x_variable < variable_analysis_results['Minimum']:
            variable_analysis_results['Minimum'] = x_variable            
        # Max. Age
        if x_variable > variable_analysis_results['Maximum']:
            variable_analysis_results['Maximum'] = x_variable
    variable_analysis_results['Average'] /= len(variable_list)
    print("-"*100)
    print(f"*** {variable} Analysis ***")
    print("-"*100)
    print(f"- The average {variable} in the dataset of patients is {round(variable_analysis_results['Average'], 2)}.")
    print(f"- The minimum {variable} in the dataset of patients is {round(variable_analysis_results['Minimum'],2)}.")
    print(f"- The maximum {variable} in the dataset of patients is {round(variable_analysis_results['Maximum'],2)}.")
    print("-"*100,'\n')
    return variable_analysis_results

# Analysis of cualitative variable
# This function is used to calculate proportions by categories
def cualitative_analysis(patients_data, variable):  
    variable_list = variable_to_list(patients_data, variable)
    variable_analysis_results = {}
    cat_list = []
    cat_dict = {}
    for element in variable_list:
        if element not in cat_list:
            cat_list.append(element)
            cat_dict[element] = 0
        if element in cat_dict.keys():
            cat_dict[element] += 1
    print("-"*100)
    print(f"*** {variable} Analysis ***")
    print("-"*100)
    for key, value in cat_dict.items():
        p_value = round(100*value/(len(variable_list)), 2)
        variable_analysis_results[key] = (value, p_value)
        print(f"- {key}: {p_value}% of patients.")
    print("-"*100,'\n')        
    return variable_analysis_results

### Testing Functions

In [5]:
age_analysis_results = cuantitative_analysis(patients_data, 'Age')
sex_analysis_results = cualitative_analysis(patients_data, 'Sex')
bmi_analysis_results = cuantitative_analysis(patients_data, 'BMI')
children_analysis_results = cuantitative_analysis(patients_data, 'No. of Children')
smoker_analysis_results = cualitative_analysis(patients_data, 'Smoker?')
region_analysis_results = cualitative_analysis(patients_data, 'Region')
charges_analysis_results = cuantitative_analysis(patients_data, 'Insurance Cost')

----------------------------------------------------------------------------------------------------
*** Age Analysis ***
----------------------------------------------------------------------------------------------------
- The average Age in the dataset of patients is 39.21.
- The minimum Age in the dataset of patients is 18.
- The maximum Age in the dataset of patients is 64.
---------------------------------------------------------------------------------------------------- 

----------------------------------------------------------------------------------------------------
*** Sex Analysis ***
----------------------------------------------------------------------------------------------------
- female: 49.48% of patients.
- male: 50.52% of patients.
---------------------------------------------------------------------------------------------------- 

----------------------------------------------------------------------------------------------------
*** BMI Analysis ***
---------