In [7]:
import numpy as np
import sys

In [19]:
def predict_life_expectancy():

    # Sets blank string
    full_model_y_n = ''
    # Prompts the user to select either valid option (y/n) until one is selected.
    while full_model_y_n != 'y' and full_model_y_n != 'n' and full_model_y_n != 'q':
        print('Please input "y" for "Yes" or "n" for "No" or "q" for "Quit".')
        try:
            # Gives users the option to exclude metadata based on individuals' medical records.
            full_model_y_n = str(input('Do you want to use data drawn from medical records? (Or press "q" for "Quit".)'))
            if full_model_y_n == 'q':
                print('Exiting function.')
                sys.exit()
                break
            if full_model_y_n == 'y' or full_model_y_n == 'n':
                break
        except:
            pass
            
    if full_model_y_n == 'y':
        while True:
            try:
                year = int(input('What is the year?'))
                # Warning that value lies outside of scaler range
                if year < 2000 or year > 2015:
                    print('WARNING: value lies outside of training data range and prediction may lose accuracy. Consider rebalancing model.')
                break
            except:
                print('Please enter a number.')
        while True:
            try:
                u5 = float(input('What is the number of deaths under 5 years old per 1000 people?'))
                # Warning that value lies outside of scaler range
                if u5 < 2.3 or u5 > 224.9:
                    print('WARNING: value lies outside of training data range and prediction may lose accuracy. Consider rebalancing model.')
                break
            except:
                print('Please enter a number.')
        while True:
            try:
                am = float(input('What is the adult mortality rate?'))
                # Warning that value lies outside of scaler range
                if am < 49.384 or am > 703.677:
                    print('WARNING: value lies outside of training data range and prediction may lose accuracy. Consider rebalancing model.')
                break
            except:
                print('Please enter a number.')
        while True:
            try:
                ac = float(input('What is the yearly alcohol consumption in litres per capita?'))
                # Warning that value lies outside of scaler range
                if ac < 0 or ac > 17.87:
                    print('WARNING: value lies outside of training data range and prediction may lose accuracy. Consider rebalancing model.')
                break
            except:
                print('Please enter a number.')
        while True:
            try:
                bmi = float(input('What is the population\'s average BMI?'))
                # Warning that value lies outside of scaler range
                if bmi < 19.8 or bmi > 32.1:
                    print('WARNING: value lies outside of training data range and prediction may lose accuracy. Consider rebalancing model.')
                break
            except:
                print('Please enter a number.')
        while True:
            try:
                p = int(input('What is the percentage of polio vaccination coverage?'))
                # Warning that value lies outside of scaler range
                if p < 8 or p > 99:
                    print('WARNING: value lies outside of training data range and prediction may lose accuracy. Consider rebalancing model.')
                break
            except:
                print('Please enter a number.')
        while True:
            try:
                hb = int(input('What is the percentage of hepatitis B vaccination coverage?'))
                # Warning that value lies outside of scaler range
                if hb < 8 or hb > 99:
                    print('WARNING: value lies outside of training data range and prediction may lose accuracy. Consider rebalancing model.')
                break
            except:
                print('Please enter a number.')
        while True:
            try:
                thin = float(input('What is the percentage of medically recorded thinness among ages 10-19?'))
                # Warning that value lies outside of scaler range
                if thin < 0.1 or thin > 27.7:
                    print('WARNING: value lies outside of training data range and prediction may lose accuracy. Consider rebalancing model.')
                break
            except:
                print('Please enter a number.')
        while True:
            try:
                hiv = float(input('How many HIV cases are there per 1000 live births?'))
                # Warning that value lies outside of scaler range
                if hiv < 0.01 or hiv > 21.68:
                    print('WARNING: value lies outside of training data range and prediction may lose accuracy. Consider rebalancing model.')
                break
            except:
                print('Please enter a number.')
        while True:
            try:
                ecd = int(input('Is the economy status developed?'))
                # Forces enternig a valid number
                if ecd == 0 or ecd == 1:
                    break
                else:
                    print('Please enter either exactly 0 (for No) or 1(for Yes).')
            except:
                pass
        while True:
            try:
                sch = float(input('What is the average years of schooling for the population?'))
                # Warning that value lies outside of scaler range
                if sch < 1.1 or sch > 14:
                    print('WARNING: value lies outside of training data range and prediction may lose accuracy. Consider rebalancing model.')
                break
            except:
                print('Please enter a number.')
        while True:
            try:
                GDP = float(input('What is the GDP per capita?'))
                # Warning that value lies outside of scaler range
                if GDP < 148 or GDP > 112418:
                    print('WARNING: value lies outside of training data range and prediction may lose accuracy. Consider rebalancing model.')
                # Transform GDP
                gdp_l = np.log(GDP)
                break
            except:
                print('Please enter a number.')

        # Scaling the units of the variables to be between 0 and 1
        sc_u5 = (u5 - 2.3)/(224.9 - 2.3)
        sc_am = (am - 49.384)/(703.677 - 49.384)
        sc_bmi = (bmi - 19.8)/(32.1-19.8)
        sc_hb = (hb - 12)/(99-12)
        sc_p = (p - 8)/(99-8)
        sc_hiv = (hiv - 0.01)/(21.68-0.01)
        sc_thin = (thin - 0.1)/(27.7-0.1)
        sc_gdp_l = (gdp_l - 4.997212)/(11.629979-4.997212)
        sc_ecd = ecd
        sc_year = (year - 2000)/(2015-2000)
        sc_ac = (ac - 0)/(17.87-0)
        sc_sch = (sch - 1.1)/(14-1.1)

        # Values, feature-engineered, multiplied by coefficients, adding the intercept and returning the predicted life expectancy.
        return "Predicted life expectancy: " + str(round((76.3813 + sc_year*0.4487 + sc_u5*-16.8442 + sc_am*-30.8633 +
                sc_ac*0.8901 + sc_bmi*-2.4692 + sc_p*1.1625 + sc_hb*-0.8044 +
                sc_thin*-1.3312 + sc_hiv*1.2507 + sc_ecd*0.8520 +
                sc_sch*0.9995 + sc_gdp_l*3.9510),2))

    if full_model_y_n == 'n':
        while True:
            try:
                year = int(input('What is the year?'))
                # Warning that value lies outside of scaler range
                if year < 2000 or year > 2015:
                    print('WARNING: value lies outside of training data range and prediction may lose accuracy. Consider rebalancing model.')
                break
            except:
                print('Please enter a number.')
        while True:
            try:
                u5 = float(input('What is the number of deaths under 5 years old per 1000 people?'))
                # Warning that value lies outside of scaler range
                if u5 < 2.3 or u5 > 224.9:
                    print('WARNING: value lies outside of training data range and prediction may lose accuracy. Consider rebalancing model.')
                break
            except:
                print('Please enter a number.')
        while True:
            try:
                am = float(input('What is the adult mortality rate?'))
                # Warning that value lies outside of scaler range
                if am < 49.384 or am > 703.677:
                    print('WARNING: value lies outside of training data range and prediction may lose accuracy. Consider rebalancing model.')
                break
            except:
                print('Please enter a number.')
        while True:
            try:
                ac = float(input('What is the yearly alcohol consumption in litres per capita?'))
                # Warning that value lies outside of scaler range
                if ac < 0 or ac > 17.87:
                    print('WARNING: value lies outside of training data range and prediction may lose accuracy. Consider rebalancing model.')
                break
            except:
                print('Please enter a number.')
        while True:
            try:
                ecd = int(input('Is the economy status developed?'))
                # Forces enternig a valid number
                if ecd == 0 or ecd == 1:
                    break
                else:
                    print('Please enter either exactly 0 (for No) or 1(for Yes).')
            except:
                pass
        while True:
            try:
                sch = float(input('What is the average years of schooling for the population?'))
                # Warning that value lies outside of scaler range
                if sch < 1.1 or sch > 14:
                    print('WARNING: value lies outside of training data range and prediction may lose accuracy. Consider rebalancing model.')
                break
            except:
                print('Please enter a number.')
        while True:
            try:
                GDP = float(input('What is the GDP per capita?'))
                # Warning that value lies outside of scaler range
                if GDP < 148 or GDP > 112418:
                    print('WARNING: value lies outside of training data range and prediction may lose accuracy. Consider rebalancing model.')
                # Transform GDP
                gdp_l = np.log(GDP)
                break
            except:
                print('Please enter a number.')
                
        # Scaling the units of the variables to be between 0 and 1
        sc_u5 = (u5 - 2.3)/(224.9 - 2.3)
        sc_am = (am - 49.384)/(703.677 - 49.384)
        sc_gdp_l = (gdp_l - 4.997212)/(11.629979-4.997212)
        sc_ecd = ecd
        sc_year = (year - 2000)/(2015-2000)
        sc_ac = (ac - 0)/(17.87-0)
        sc_sch = (sch - 1.1)/(14-1.1)

        # Values, feature-engineered, multiplied by coefficients, adding the intercept and returning the predicted life expectancy.
        return "Predicted life expectancy: " + str(round((75.6883 + sc_year*0.3419 + sc_u5*-16.9706 + sc_am*-29.9728 +
                sc_ac*1.0791 + sc_ecd*1.2061 + sc_sch*0.6785 + sc_gdp_l*3.4130),2))

In [21]:
predict_life_expectancy()

Please input "y" for "Yes" or "n" for "No" or "q" for "Quit".


Do you want to use data drawn from medical records? (Or press "q" for "Quit".) n
What is the year? 200




What is the number of deaths under 5 years old per 1000 people? 1




What is the adult mortality rate? 1




What is the yearly alcohol consumption in litres per capita? 1
Is the economy status developed? 1
What is the average years of schooling for the population? 1




What is the GDP per capita? 1




'Predicted life expectancy: 35.67'