In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt
import math
from math import trunc
from datetime import datetime, date, timedelta
import os
%matplotlib inline

In [None]:
directory = '../data/aggregates/'

# CTIS data collected by CMU and Facebook:
US_ML = pd.concat([pd.read_csv(os.path.join(directory, filename)) for filename in os.listdir(directory) if os.path.isfile(os.path.join(directory, filename))])

regions1 = US_ML.state.unique()

In [None]:
# Ground truth of serology data from CDC:
directory1 = '../data/'
US_official = pd.read_csv(os.path.join(directory1, "Nationwide_Commercial_Laboratory_Seroprevalence_Survey.csv"))

# NaNs in the database are represented by 777. We remove the empty rows.
US_official['Rate (%) [All Ages Cumulative Prevalence, Rounds 1-30 only]'] = US_official['Rate (%) [All Ages Cumulative Prevalence, Rounds 1-30 only]'].replace(777, np.nan)
US_official.drop(US_official[US_official['Round']>30].index,inplace=True)

US_official.drop(US_official[US_official['Estimated cumulative infections count'].isna()].index,inplace=True)
US_official['Estimated cumulative infections count'] = US_official['Estimated cumulative infections count'].apply(lambda x: int(x.replace(",","")))

regions2 = US_official.Site.unique()

In [None]:
def format_date(date_string, start_end):
    if len(date_string) > 25:
        start_month_day_year, end_month_day_year =  list(map(lambda x: x.strip(), date_string.split(' - ')))
        start_month_day, start_year = start_month_day_year.split(', ')
        end_month_day, end_year = end_month_day_year.split(', ')
        start_date = datetime.strptime(start_month_day + ' ' + start_year,'%b %d %Y')
        end_date = datetime.strptime(end_month_day + ' ' + end_year,'%b %d %Y')
    else:
        month_day, year = date_string.split(', ')
        start_month_day, end_month_day = list(map(lambda x: x.strip(), month_day.split(' - ')))
        start_date = datetime.strptime(start_month_day + ' ' + year, '%b %d %Y')
        end_date = datetime.strptime(end_month_day + ' ' + year, '%b %d %Y')
    if start_end == 'start':
        return start_date.strftime('%Y-%m-%d')

    else:
        return end_date.strftime('%Y-%m-%d')

In [None]:
# Save start and end dates of rounds:
US_official['start_date'] = US_official['Date Range of Specimen Collection'].map(lambda x: format_date(x, 'start'), na_action='ignore')
US_official['end_date'] = US_official['Date Range of Specimen Collection'].map(lambda x: format_date(x, 'end'), na_action='ignore')

In [None]:
US_official['days between rounds'] = US_official['end_date'].map(lambda x: datetime.strptime(x, '%Y-%m-%d') if (type(x)== str) else timedelta(days=14)) - US_official['start_date'].map(lambda x: datetime.strptime(x, '%Y-%m-%d') if (type(x)== str) else timedelta(days=0))
US_official['days between rounds'] = US_official['days between rounds'].map(lambda x: x.days + 1)

In [None]:
# Official data from Our World in Data:
US_cases = pd.read_csv(os.path.join(directory1, "United_States_COVID-19_Cases_and_Deaths_by_State_over_Time.csv"))

#We adapt the date to the format we are using (yyyy-mm-dd):
US_cases['submission_date'] = US_cases['submission_date'].apply(lambda x: datetime.strptime(x, '%m/%d/%Y'))

US_cases['new_case'] = US_cases['new_case'].apply(lambda x: int(x.replace(",","")))
US_cases['tot_death'] = US_cases['tot_death'].apply(lambda x: int(x.replace(",","")))

regions3 = US_cases.state.unique()

# Example to see data: Arkansas
data = US_cases[US_cases['state'] == 'AK'].sort_values(by='submission_date') #Take data chronologically
plt.plot( data['submission_date'], data['new_case']) #Plot of total cases in Arkansas

plt.show()

In [None]:
# state_alphaCode sets the codes used to identify the states:
states = pd.read_csv(os.path.join(directory1, "State_codes.csv"))
states = states.drop(columns=['Numeric Code  '], axis=1)
states = states.rename(columns={' Name ' : 'State', ' Alpha Code ': 'Alpha Code'})
states.State = states.State.apply(lambda s: s.strip().capitalize())
states_unformatted = states.to_dict('records')
state_alphaCode = dict()

for unformatted_element in states_unformatted:
    state_alphaCode[unformatted_element['State']] = unformatted_element['Alpha Code'].strip()
    
# Add state codes in US_ML:
US_ML['state code']  = US_ML['state'].map(lambda x: x.capitalize()).replace(state_alphaCode)

In [None]:
# Definite list of states to be used:
regions = list(US_ML['state code'].unique())
regions.sort()

In [None]:
# COVID-19 cases estimations via wastewater SARS-CoV-2 concentration.
# WW_cases is the estimation for total infected population throughout time
WW_cases = pd.read_csv(os.path.join(directory1, "ww_estimate_infections.csv"))
WW_cases.drop(labels='id',axis=1,inplace=True) #We drop the states' id

WW_cases['state code']  = WW_cases['Country'].map(lambda x: x.capitalize()).replace(state_alphaCode) #adds the state code
#There are colonies, not only states and DC. remove the colonies:
for state_i in WW_cases['state code'].unique():
    if state_i not in state_alphaCode.values():
        WW_cases.drop(WW_cases[WW_cases['state code']==state_i].index,inplace=True)

In [None]:
WW_new = pd.DataFrame(columns=['site','date','cases'])
dates_WW = WW_cases.columns[1:-1]
counter = 0
for state in WW_cases['state code'].unique():
    WW_new.loc[counter] = [state,dates_WW[0],WW_cases[WW_cases['state code']==state][dates_WW[0]].iloc[0]]
    counter += 1
    for date_i in range(1,len(dates_WW)):
        WW_new.loc[counter] = [state,dates_WW[date_i],WW_cases[WW_cases['state code']==state][dates_WW[date_i]].iloc[0]]
        counter += 1
WW_new.rename(columns={'cases': 'ww_cases'}, inplace=True) #rename "cases" so that it's not confused with other data
WW_cases = WW_new
#Now we have the same format as in the other dataframes

# Classes

In [None]:
class AcumulatedIncidences:

    """dataframes to be used"""
    US_ML = US_ML
    US_official = US_official
    US_cases = US_cases
    WW_cases = WW_cases
    phi = (1+math.sqrt(5))/2 # for the purpose of making graph look nice
    regions = list(US_ML['state code'].unique()) #given all datasets have rows associated to these states, we will use these
    #Remember: regions=list(US_ML['state code'].unique())

    """
    define a start_date and an end_date to define the interval of study
    """


    def __init__(self,signals=['p_cli','p_rf','p_XGB','p_glm','new_case','ww_cases']):
        #signals are explanatory variables
        self.signals = signals
        self.incidenceVectors = dict()
        self.referenceVector = np.array([])
        self.incidenceDataFrame = None
        self.correlationFactors = dict()

    def getStartDate(self, d, region):
        #d = the date at the start of our interval
        # Finds the first available 'end_date' in the interval [d,Inf)
        start_date = US_official[(US_official['end_date'] >= d) & (US_official['Site'] == region)]['end_date'].iloc[0]
        return start_date


    def getEndDate(self, d, region):
        # Finds the last available 'end_date' in the interval (Inf,d]
        end_date = US_official[(US_official['end_date'] <= d) & (US_official['Site'] == region)]['end_date'].iloc[-1]
        return end_date


    def calculateDaysBetween(self, date1, date2):
        #returns the days between date1 and date2
        try:
            start_date = datetime.strptime(date1, '%Y-%m-%d')
            end_date = datetime.strptime(date2, '%Y-%m-%d')
            days = (end_date-start_date).days
        except TypeError:
            days = None
        return days

    def calculateVectorEntryForRegion(self, region, signal, start_date, end_date):
        #Gets number of infections for "region" between specified dates, using the specified model "signal". These values are the elements of the incidence vectors
        start_date = self.getStartDate(start_date,region)
        end_date = self.getEndDate(end_date,region)
        if signal in US_ML.columns:
            data = US_ML[(US_ML['date'] >= start_date) & (US_ML['date'] <= end_date) & (US_ML['state code'] == region)]
            cumulativeIntegral = data[signal].sum()
        elif signal in US_cases.columns:
            data = US_cases[(US_cases['submission_date'] >= start_date) & (US_cases['submission_date'] <= end_date) & (US_cases['state'] == region)]
            cumulativeIntegral = data[signal].sum()
        elif signal in WW_cases.columns:
            data = WW_cases[(WW_cases['date'] >= start_date) & (WW_cases['date'] <= end_date) & (WW_cases['site'] == region)]
            cumulativeIntegral = data[signal].sum()
        return cumulativeIntegral

    def MLEcoefs(self, reference_vector, incidence_matrix):
        # Estimates the coefficients of Linear Regression
        XtX = np.dot(incidence_matrix.T,incidence_matrix)
        Xty = np.dot(incidence_matrix.T,reference_vector)
        return np.linalg.solve(XtX,Xty)




class TemporalIncidences(AcumulatedIncidences):

    def __init__(self, region, interval_type='from0', signals=['p_cli','p_rf','p_XGB','p_glm','new_case','ww_cases'], rounds=None):
        self.region = region
        self.interval_type = interval_type
        self.incidenceMatrix = np.empty((0,len(signals)+1))
        if rounds is None:
            self.rounds = list(US_official[US_official['Site']==self.region]['Round'])
        else:
            self.rounds = rounds
        AcumulatedIncidences.__init__(self,signals)


    def calculateVectors(self):
        #Computes the incidence vectors: they're dicts where each pair of dates (start,end) has a value (that value is the % of infected people between those rounds and the selected region)
        for start_date, end_date in self.zipRounds():
            self.incidenceVectors['{0} {1}'.format(start_date, end_date)] = np.array([])
            for signal in self.signals:
                self.incidenceVectors['{0} {1}'.format(start_date, end_date)] = np.append(self.incidenceVectors['{0} {1}'.format(start_date, end_date)], self.calculateVectorEntryForRegion(self.region, signal, start_date, end_date))
        return self.incidenceVectors

    # single1 = Non-cumulative
    # from1 = Cumulative
    
    def zipRounds(self):
        #with this we can get the pairs of (start_date,end_date) with which we speccify the incidence vectors
        dates = list(US_official[(US_official['Site'] == self.region) & (US_official['Round'].map(lambda x: x in self.rounds))]['end_date'])

        if self.interval_type == 'single1':
            return zip(dates[:-1], dates[1:])
        elif self.interval_type == 'from1':
            return zip([dates[0] for i in range(len(dates)-1)], dates[1:])
        else:
            raise ValueError('unknown type of interval has been used', self.interval_type)


    def addReferenceVector(self):
        self.referenceVector = np.array([])
        for start_date, end_date in self.zipRounds():
            cumulativeIntegral = US_official[(US_official['end_date'] == end_date) & (US_official['Site'] == self.region)]['Rate (%) [All Ages Cumulative Prevalence, Rounds 1-30 only]'].iloc[0]
            self.referenceVector = np.append(self.referenceVector, cumulativeIntegral/100)
        return self.referenceVector

    def calculateIncidenceMatrix(self, normalize_everything=False):
        self.calculateVectors()
        self.incidenceMatrix = np.empty((0,len(self.signals)+1))
        for start_date, end_date in self.zipRounds():
            row = np.array([1]) # add a one if trying to add a coefficient
            row = np.append(row, self.incidenceVectors['{0} {1}'.format(start_date, end_date)])
            self.incidenceMatrix = np.vstack([self.incidenceMatrix, row])

        # Normilise tot_cases and ww_cases (or everything):
        to_normalize = self.signals if normalize_everything else ['new_case','ww_cases']
        for signal in to_normalize:
            if signal in self.signals:
                col_i = self.signals.index(signal)+1
                max_value = max(self.incidenceMatrix[:,col_i])
                self.incidenceMatrix[:,col_i] = self.incidenceMatrix[:,col_i]/max_value

        # Add days between data-points and real values:
        if self.interval_type == 'from1':
            # we have to remove the first column (constant)
            self.incidenceMatrix = np.delete(self.incidenceMatrix,0,1)
            a = np.array(US_official[(US_official['Site'] == self.region) & (US_official['Round'].map(lambda x: x in self.rounds))]['end_date'][1:].map(lambda x: datetime.strptime(x, '%Y-%m-%d')))
            first = datetime.strptime(US_official[(US_official['Site'] == self.region) & (US_official['Round'].map(lambda x: x in self.rounds))]['end_date'].iloc[0], '%Y-%m-%d')
            egunak = pd.array(a).map(lambda x: (x-first).days)
            self.incidenceMatrix = np.column_stack([ self.incidenceMatrix, egunak/max(egunak) ]) # weeks between rounds (float1 interval)
            self.incidenceMatrix = np.column_stack([ self.incidenceMatrix, np.array([US_official[(US_official['Site'] == self.region) & (US_official['Round'].map(lambda x: x in self.rounds))]['Rate (%) [All Ages Cumulative Prevalence, Rounds 1-30 only]'].iloc[0]/100 for i in range(len(a))]) ])

        elif self.interval_type == 'single1':
            a = np.array(US_official[(US_official['Site'] == self.region) & (US_official['Round'].map(lambda x: x in self.rounds))]['end_date'][1:].map(lambda x: datetime.strptime(x, '%Y-%m-%d')))
            b = np.array(US_official[(US_official['Site'] == self.region) & (US_official['Round'].map(lambda x: x in self.rounds))]['end_date'][:-1].map(lambda x: datetime.strptime(x, '%Y-%m-%d')))
            egunak = pd.array(a-b).map(lambda x: x.days)
            self.incidenceMatrix = np.column_stack([ self.incidenceMatrix, egunak/max(egunak) ]) # days between rounds (normilised)
            self.incidenceMatrix = np.column_stack([ self.incidenceMatrix, np.array(US_official[(US_official['Site'] == self.region) & (US_official['Round'].map(lambda x: x in self.rounds))]['Rate (%) [All Ages Cumulative Prevalence, Rounds 1-30 only]'][:-1])/100 ]) # zati 100 ehunekoa delako

        return self.incidenceMatrix


    def estimate(self, only_coefs=False, dates=False):
        first_end_date = self.getStartDate('2020-01-01',self.region)
        end_dates = [first_end_date] + [end for (start,end) in self.zipRounds()]

        ref_vec = self.addReferenceVector()
        inc_mat = self.calculateIncidenceMatrix()
        coeff = self.MLEcoefs(ref_vec,inc_mat)
        if only_coefs==True:
            if dates==True:
                return (coeff,end_dates)
            else:
                return coeff
        else:
            estim = np.dot(inc_mat,coeff)
            if dates==True:
                return (estim,end_dates)
            else:
                return estim




class AllRegionsAllRounds(AcumulatedIncidences):
    """
    Works with multiple states at once. Used for nationwide models. The states used can be specified.
    """

    def __init__(self, interval_type='single1', states=None, signals=['p_cli','p_rf','p_XGB','p_glm','new_case','ww_cases']):
        self.interval_type = interval_type
        self.incidenceMatrix = np.empty((0,len(signals)+1))
        AcumulatedIncidences.__init__(self,signals)
        self.regions = regions if states==None else states

    def zipRounds(self,region):
        #with this we can get the pairs of (start_round,end_round) with which we speccify the incidence vectors
        dates = list(US_official[US_official['Site'] == region]['end_date'])

        if self.interval_type == 'single1':
            return zip(dates[:-1], dates[1:])
        elif self.interval_type == 'from1':
            return zip([dates[0] for i in range(len(dates)-1)], dates[1:])
        else:
            raise ValueError('unknown type of interval has been used', self.interval_type)

    def calculateIncidenceMatrix(self, normalize_everything=False):
        self.incidenceMatrix = np.empty((0,len(self.signals)+3))
        
        # Normilise tot_cases and ww_cases (or everything):
        to_normalize = self.signals if normalize_everything else ['new_case','ww_cases']
        for region in self.regions:
            temp_mat = np.empty((0,len(self.signals)+1))
            for start_date, end_date in self.zipRounds(region):
                row = np.array([1]) # add a one if trying to add an intercept
                for signal in self.signals:
                    row = np.append(row, self.calculateVectorEntryForRegion(region, signal, start_date, end_date))
                temp_mat = np.vstack([temp_mat, row])
            for signal in to_normalize:
                if signal in self.signals:
                    col_i = self.signals.index(signal)+1
                    max_value = max(temp_mat[:,col_i])
                    temp_mat[:,col_i] = temp_mat[:,col_i]/max_value
                    
            # Add real values and days between data-points:
            a = np.array(US_official[US_official['Site']==region]['end_date'][1:])
            b = np.array(US_official[US_official['Site']==region]['end_date'][:-1])
            first = US_official[US_official['Site']==region]['end_date'].iloc[0]
            if self.interval_type == 'from1':
                egunak = np.array([self.calculateDaysBetween(first,xa) for xa in a])
                refs = np.array([US_official[US_official['Site']==region]['Rate (%) [All Ages Cumulative Prevalence, Rounds 1-30 only]'].iloc[0]/100 for i in range(len(a))])
            elif self.interval_type == 'single1':
                egunak = np.array([self.calculateDaysBetween(xb,xa) for (xa,xb) in zip(a,b)])
                refs = np.array([ xx/100 for xx in US_official[US_official['Site']==region]['Rate (%) [All Ages Cumulative Prevalence, Rounds 1-30 only]'][:-1] ])
            temp_mat = np.column_stack([ temp_mat, egunak/egunak.max() ])
            temp_mat = np.column_stack([ temp_mat, refs ])
            self.incidenceMatrix = np.vstack([self.incidenceMatrix, temp_mat])
            
        if self.interval_type=='from1':
            self.incidenceMatrix = np.delete(self.incidenceMatrix,0,1)


        return self.incidenceMatrix

    def addReferenceVector(self):
        self.referenceVector = np.array([])
        for region in self.regions:
            for start_date, end_date in self.zipRounds(region):
                cumulativeIntegral = US_official[(US_official['end_date'] == end_date) & (US_official['Site'] == region)]['Rate (%) [All Ages Cumulative Prevalence, Rounds 1-30 only]'].iloc[0]
                self.referenceVector = np.append(self.referenceVector, cumulativeIntegral/100)
        return self.referenceVector


    def estimate(self, only_coefs=False, inc_mat=None):
        ref_vec = self.addReferenceVector()
        if type(inc_mat)==type(None):
            inc_mat = self.calculateIncidenceMatrix()
        coeff = self.MLEcoefs(ref_vec,inc_mat)
        if only_coefs==True:
            return coeff
        else:
            estim = np.dot(inc_mat,coeff)
            return estim

# Genetic Programming

In [None]:
from deap import gp
from deap import creator
from deap import base
from deap import tools
import operator
import math
import random
from deap import algorithms
import networkx as nx
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec

In [None]:
# Operators

def protectedAdd(left,right):
    if math.isnan(left+right):
        return float('inf')
    else:
        return left+right
def protectedSub(left,right):
    if math.isnan(left-right):
        return float('inf')
    else:
        return left-right
def protectedDiv(left, right):
    if right==0.0 or np.absolute(left)==float('inf'):
        return float('inf')
    else:
        return left / right
def protectedMul(left,right):
    if (np.absolute(left)==float('inf') and right==0.0) or (np.absolute(right)==float('inf') and left==0.0):
        return float('inf')
    else:
        return left * right
def protectedLn(value):
    if value > 0.0:
        return math.log(value)
    else: #Logaritm of non-positive number. Returning 1.
        return float('inf')
def protectedExp(value):
    if value < 709: # This is the biggest number the exponential works with.
        return math.exp(value)
    else: #Overflow in exponential. Returning 1.
        return float('inf')

In [None]:
class GPclass:

    def __init__(self, state, interval):
        self.state = state
        self.interval = interval
        self.signals = ['p_cli','p_rf','p_XGB','p_glm','new_case','ww_cases']


    def getDataFrame(self, get_dates=False):
        ## gets the dataframe
        instance = TemporalIncidences(self.state, interval_type=self.interval)
        helburua = instance.addReferenceVector()
        datuak = instance.calculateIncidenceMatrix()
        if self.interval=='single1':
            datuak = datuak[:,1:]
        datuak_df = pd.DataFrame(datuak, columns=self.signals+['ref','wbr'], index=list(range(2,31)))
        datuak_df['Y'] = helburua
        self.datuak = datuak_df
        
        if get_dates:
            end_dates = instance.estimate(only_coefs=True,dates=True)[1]
            return datuak_df,end_dates
        else:
            return datuak_df

    def setVarsOps(self):
        ## sets the PrimitiveSet, with the operators and variables (The operators must already be defined)
        pset = gp.PrimitiveSet("MAIN", 8)
        pset.addPrimitive(protectedAdd, 2, name='add')
        pset.addPrimitive(protectedSub, 2, name='sub')
        pset.addPrimitive(protectedMul, 2, name='mul')
        pset.addPrimitive(protectedDiv, 2, name='div')
        pset.addPrimitive(operator.neg, 1)
        pset.addPrimitive(protectedLn, 1, name='ln')
        pset.addPrimitive(protectedExp, 1, name='exp')
        pset.addTerminal(0.01)
        pset.addTerminal(0.1)
        pset.addTerminal(1)
        pset.addTerminal(10)
        pset.addTerminal(100)
        
        pset.renameArguments(ARG0='cli')
        pset.renameArguments(ARG1='rf')
        pset.renameArguments(ARG2='XGB')
        pset.renameArguments(ARG3='glm')
        pset.renameArguments(ARG4='pos')
        pset.renameArguments(ARG5='ww')
        pset.renameArguments(ARG6='ref')
        pset.renameArguments(ARG7='wbr')
        
        self.pset = pset
        return pset
    
    def setCreator(self):
        ## establishes the fitness etc.
        creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
        creator.create("Individual", gp.PrimitiveTree, fitness=creator.FitnessMin)
        
        
    def evalFunc(self, individual, points, max_depth, eval_type=None, extra_param=0):
        ## eval_type = method to combine error and complexity:
        ##  ^ (harm_mean, geom_mean, addition, multiplication, arit_mean, m_abs_error, s_sq_error)
        
        if eval_type==None:
            eval_type = self.eval_type
        
        depth = individual.height +1 # gero depth=0 izateak dakartzan arazoak ekiditeko
        n_depth = depth/max_depth

        func = self.toolbox.compile(expr=individual)
        if self.interval=='from0':
            errors = np.array([(np.absolute(func(self.datuak['p_cli'].iloc[i_p],self.datuak['p_rf'].iloc[i_p],self.datuak['p_XGB'].iloc[i_p],self.datuak['p_glm'].iloc[i_p],self.datuak['new_case'].iloc[i_p],self.datuak['ww_cases'].iloc[i_p],self.datuak['wbr'].iloc[i_p]) - self.datuak['Y'].iloc[i_p])/self.datuak['Y'].iloc[i_p]) for i_p in points])
        else:
            errors = np.array([(np.abs(func(self.datuak['p_cli'].iloc[i_p],self.datuak['p_rf'].iloc[i_p],self.datuak['p_XGB'].iloc[i_p],self.datuak['p_glm'].iloc[i_p],self.datuak['new_case'].iloc[i_p],self.datuak['ww_cases'].iloc[i_p],self.datuak['wbr'].iloc[i_p],self.datuak['ref'].iloc[i_p]) - self.datuak['Y'].iloc[i_p])/self.datuak['Y'].iloc[i_p]) for i_p in points])
        if math.isnan(errors.mean()):
            print(gp.PrimitiveTree(individual))
        mae = errors.mean()
        
        if eval_type=='harm_mean':
            return 2/(1/mae + 1/n_depth),
        elif eval_type=='geom_mean':
            return math.sqrt(mae * n_depth),
        elif eval_type=='arit_mean':
            return (mae + n_depth)/2,
        elif eval_type=='addition':
            return mae + n_depth,
        elif eval_type=='multiplication':
            return mae*n_depth,
        
        elif eval_type=='m_abs_error': # mean absolute error
            return mae,
        elif eval_type=='s_sq_error': # sum of squared error
            errors = np.array([((func(self.datuak['p_cli'].iloc[i_p],self.datuak['p_rf'].iloc[i_p],self.datuak['p_XGB'].iloc[i_p],self.datuak['p_glm'].iloc[i_p],self.datuak['new_case'].iloc[i_p],self.datuak['ww_cases'].iloc[i_p],self.datuak['wbr'].iloc[i_p],self.datuak['ref'].iloc[i_p]) - self.datuak['Y'].iloc[i_p])**2/self.datuak['Y'].iloc[i_p]) for i_p in points])
            return errors.sum(),
        
        elif eval_type=='weighted': #weighted complexity
            return mae + extra_param*n_depth,

        else:
            raise ValueError('unknown type of evaluation has been used', self.eval_type)


    def setToolbox(self, eval_type, points=None, max_depth=10):
        if points==None:
            points = list(range(self.datuak.shape[0]))
        ## sets the toolbox and all of its components
        self.eval_type=eval_type
        self.max_depth = max_depth
        
        toolbox = base.Toolbox()
        toolbox.register("expr", gp.genHalfAndHalf, pset=self.pset, min_=1, max_=self.max_depth)
        toolbox.register("individual", tools.initIterate, creator.Individual, toolbox.expr)
        toolbox.register("population", tools.initRepeat, list, toolbox.individual)
        toolbox.register("compile", gp.compile, pset=self.pset)
        
        toolbox.register("evaluate", self.evalFunc, points=points, max_depth=self.max_depth)
        
        toolbox.register("select", tools.selTournament, tournsize=3)
        toolbox.register("mate", gp.cxOnePoint)
        toolbox.register("expr_mut", gp.genFull, min_=0, max_=self.max_depth) # KONTUZ! hemen agian max baxuagoa
        toolbox.register("mutate", gp.mutUniform, expr=toolbox.expr_mut, pset=self.pset)

        toolbox.decorate("mate", gp.staticLimit(key=operator.attrgetter("height"), max_value=self.max_depth))
        toolbox.decorate("mutate", gp.staticLimit(key=operator.attrgetter("height"), max_value=self.max_depth))
        
        self.toolbox = toolbox
        return toolbox
    
    def myEA(self, population, toolbox, cxpb, mutpb, margin, nc_gens, max_gens, halloffame, print_err=False):
        ## Evaluate the individuals with an invalid fitness
        invalid_ind = [ind for ind in population if not ind.fitness.valid]
        fitnesses = self.toolbox.map(toolbox.evaluate, invalid_ind)
        for ind, fit in zip(invalid_ind, fitnesses):
            ind.fitness.values = fit
        halloffame.update(population)
        
        ## Begin the generational process
        closeness = self.toolbox.evaluate(halloffame[0])[0]
        nc_gen = 0
        temp_mutpb= 2*mutpb if mutpb<=0.5 else 1
        for gen in range(1,max_gens+1):
            ## Select the next generation individuals
            offspring = self.toolbox.select(population, len(population))

            if gen == 100:
                temp_mutpb = mutpb
            
            ## Vary the pool of individuals
            offspring = algorithms.varAnd(offspring, self.toolbox, cxpb, temp_mutpb)

            ## Evaluate the individuals with an invalid fitness
            invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
            fitnesses = toolbox.map(self.toolbox.evaluate, invalid_ind)
            for ind, fit in zip(invalid_ind, fitnesses):
                ind.fitness.values = fit

            ## Replace the current population by the offspring
            new_pop = tools.HallOfFame(len(population))
            new_pop.update(population+offspring)
            population[:] = list(new_pop)

            ## Update the hall of fame with the new population
            halloffame.update(population)

            ## Update closeness and generation number
            closeness_new = self.toolbox.evaluate(halloffame[0])[0]

            if print_err:
                print('{0} error:'.format(gen),round(closeness_new,5),'({0})'.format(round(closeness_new-closeness,3)),sep='\t')
            if np.absolute(closeness-closeness_new) <= margin:
                nc_gen += 1
                if nc_gen == nc_gens:
                    break
            else:
                closeness = closeness_new
                nc_gen = 0
        return population, gen

In [None]:
# No from0, single0

class GPclass_USA:

    def __init__(self, interval, states=None):
        self.states = states
        if states==None:
            self.states = regions
        self.interval = interval
        self.signals = ['p_cli','p_rf','p_XGB','p_glm','new_case','ww_cases']


    def getDataFrame(self, get_dates=False):
        ## gets the dataframe
        instance = AllRegionsAllRounds(interval_type=self.interval, states=self.states)
        helburua = instance.addReferenceVector()
        datuak = instance.calculateIncidenceMatrix()
        if self.interval=='single1':
            datuak = datuak[:,1:]
        datuak_df = pd.DataFrame(datuak, columns=self.signals+['wbr','ref'])#, index=list(range(2,31)))
        datuak_df['Y'] = helburua
        self.datuak = datuak_df
        return datuak_df

    def setVarsOps(self):
        ## sets the PrimitiveSet, with the operators and variables (The operators must already be defined)
        pset = gp.PrimitiveSet("MAIN", 8)
        pset.addPrimitive(protectedAdd, 2, name='add')
        pset.addPrimitive(protectedSub, 2, name='sub')
        pset.addPrimitive(protectedMul, 2, name='mul')
        pset.addPrimitive(protectedDiv, 2, name='div')
        pset.addPrimitive(operator.neg, 1)
        pset.addPrimitive(protectedLn, 1, name='ln')
        pset.addPrimitive(protectedExp, 1, name='exp')
        pset.addTerminal(0.01)
        pset.addTerminal(0.1)
        pset.addTerminal(1)
        pset.addTerminal(10)
        pset.addTerminal(100)
        
        pset.renameArguments(ARG0='cli')
        pset.renameArguments(ARG1='rf')
        pset.renameArguments(ARG2='XGB')
        pset.renameArguments(ARG3='glm')
        pset.renameArguments(ARG4='pos')
        pset.renameArguments(ARG5='ww')
        pset.renameArguments(ARG6='wbr')
        pset.renameArguments(ARG7='ref')
        
        self.pset = pset
        return pset
    
    def setCreator(self):
        ## establishes the fitness etc.
        creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
        creator.create("Individual", gp.PrimitiveTree, fitness=creator.FitnessMin)
        
        
    def evalFunc(self, individual, points, max_depth, eval_type=None, extra_param=0):
        ## eval_type = method to combine error and complexity:
        ##  ^ (harm_mean, geom_mean, addition, multiplication, arit_mean, m_abs_error, s_sq_error)
        
        if eval_type==None:
            eval_type = self.eval_type
        
        depth = individual.height +1 # gero depth=0 izateak dakartzan arazoak ekiditeko
        n_depth = depth/max_depth

        func = self.toolbox.compile(expr=individual)
        errors = np.array([(np.abs(func(self.datuak['p_cli'].iloc[i_p],self.datuak['p_rf'].iloc[i_p],self.datuak['p_XGB'].iloc[i_p],self.datuak['p_glm'].iloc[i_p],self.datuak['new_case'].iloc[i_p],self.datuak['ww_cases'].iloc[i_p],self.datuak['wbr'].iloc[i_p],self.datuak['ref'].iloc[i_p]) - self.datuak['Y'].iloc[i_p])/self.datuak['Y'].iloc[i_p]) for i_p in points])
        if math.isnan(errors.mean()):
            print(gp.PrimitiveTree(individual))
        mae = errors.mean()
        
        if eval_type=='harm_mean':
            return 2/(1/mae + 1/n_depth),
        elif eval_type=='geom_mean':
            return math.sqrt(mae * n_depth),
        elif eval_type=='arit_mean':
            return (mae + n_depth)/2,
        elif eval_type=='addition':
            return mae + n_depth,
        elif eval_type=='multiplication':
            return mae*n_depth,
        
        elif eval_type=='m_abs_error': # mean absolute error
            return mae,
        elif eval_type=='s_sq_error': # sum of squared error
            errors = np.array([((func(self.datuak['p_cli'].iloc[i_p],self.datuak['p_rf'].iloc[i_p],self.datuak['p_XGB'].iloc[i_p],self.datuak['p_glm'].iloc[i_p],self.datuak['new_case'].iloc[i_p],self.datuak['ww_cases'].iloc[i_p],self.datuak['wbr'].iloc[i_p],self.datuak['ref'].iloc[i_p]) - self.datuak['Y'].iloc[i_p])**2/self.datuak['Y'].iloc[i_p]) for i_p in points])
            return errors.sum(),
        
        elif eval_type=='weighted': #weighted complexity
            return mae + extra_param*n_depth,

        else:
            raise ValueError('unknown type of evaluation has been used', self.eval_type)


    def setToolbox(self, eval_type, points=None, max_depth=10):
        if points==None:
            points = list(range(self.datuak.shape[0]))
        ## sets the toolbox and all of its components
        self.eval_type=eval_type
        self.max_depth = max_depth
        
        toolbox = base.Toolbox()
        toolbox.register("expr", gp.genHalfAndHalf, pset=self.pset, min_=1, max_=self.max_depth)
        toolbox.register("individual", tools.initIterate, creator.Individual, toolbox.expr)
        toolbox.register("population", tools.initRepeat, list, toolbox.individual)
        toolbox.register("compile", gp.compile, pset=self.pset)
        
        toolbox.register("evaluate", self.evalFunc, points=points, max_depth=self.max_depth)
        
        toolbox.register("select", tools.selTournament, tournsize=3)
        toolbox.register("mate", gp.cxOnePoint)
        toolbox.register("expr_mut", gp.genFull, min_=0, max_=self.max_depth) # KONTUZ! hemen agian max baxuagoa
        toolbox.register("mutate", gp.mutUniform, expr=toolbox.expr_mut, pset=self.pset)

        toolbox.decorate("mate", gp.staticLimit(key=operator.attrgetter("height"), max_value=self.max_depth))
        toolbox.decorate("mutate", gp.staticLimit(key=operator.attrgetter("height"), max_value=self.max_depth))
        
        self.toolbox = toolbox
        return toolbox
    
    def myEA(self, population, toolbox, cxpb, mutpb, margin, nc_gens, max_gens, halloffame, print_err=False):
        ## Evaluate the individuals with an invalid fitness
        invalid_ind = [ind for ind in population if not ind.fitness.valid]
        fitnesses = self.toolbox.map(toolbox.evaluate, invalid_ind)
        for ind, fit in zip(invalid_ind, fitnesses):
            ind.fitness.values = fit
        halloffame.update(population)
        
        ## Begin the generational process
        closeness = self.toolbox.evaluate(halloffame[0])[0]
        nc_gen = 0
        temp_mutpb= 2*mutpb if mutpb<=0.5 else 1
        for gen in range(1,max_gens+1):
            ## Select the next generation individuals
            offspring = self.toolbox.select(population, len(population))

            if gen == 100:
                temp_mutpb = mutpb
            
            ## Vary the pool of individuals
            offspring = algorithms.varAnd(offspring, self.toolbox, cxpb, temp_mutpb)

            ## Evaluate the individuals with an invalid fitness
            invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
            fitnesses = toolbox.map(self.toolbox.evaluate, invalid_ind)
            for ind, fit in zip(invalid_ind, fitnesses):
                ind.fitness.values = fit

            ## Replace the current population by the offspring
            new_pop = tools.HallOfFame(len(population))
            new_pop.update(population+offspring)
            population[:] = list(new_pop)

            ## Update the hall of fame with the new population
            halloffame.update(population)

            ## Update closeness and generation number
            closeness_new = self.toolbox.evaluate(halloffame[0])[0]

            if print_err:
                print('{0} error:'.format(gen),round(closeness_new,5),'({0})'.format(round(closeness_new-closeness,3)),sep='\t')
            if np.absolute(closeness-closeness_new) <= margin:
                nc_gen += 1
                if nc_gen == nc_gens:
                    break
            elif np.abs(self.evalFunc(hof[0], range(29), 4, eval_type='m_abs_error')[0]-0.088)<=0.002:
                break
            else:
                closeness = closeness_new
                nc_gen = 0
        return population, gen

In [None]:
# Example with CA non-cumulative (max depth 8):

mygp = GPclass('CA','single1')
datuak_df = mygp.getDataFrame()
mygp.setVarsOps()
mygp.setCreator()
mygp.setToolbox(eval_type='s_sq_error', max_depth=8)

pop = mygp.toolbox.population(n=300)
hof = tools.HallOfFame(1)
pop, gens = mygp.myEA(pop, mygp.toolbox, cxpb=0.8, mutpb=0.3, margin=0.005, nc_gens=80, max_gens=700, halloffame=hof, print_err=False)

print('MARE:',mygp.evalFunc(hof[0], range(29), 4, eval_type='m_abs_error')[0])
print(hof[0])

In [None]:
# Example with Top 10 states, cumulative (max depth 8):

top10 = ['CA','TX','FL','NY','PA','IL','OH','NC','MI','GA']

mygp = GPclass_USA('from1', states=top10)
datuak_df = mygp.getDataFrame()
mygp.setVarsOps()
mygp.setCreator()
mygp.setToolbox(eval_type='s_sq_error', max_depth=8)

pop = mygp.toolbox.population(n=300)
hof = tools.HallOfFame(1)
pop, gens = mygp.myEA(pop, mygp.toolbox, cxpb=0.8, mutpb=0.3, margin=0.005, nc_gens=80, max_gens=700, halloffame=hof, print_err=False)

print('MARE:',mygp.evalFunc(hof[0], range(29), 4, eval_type='m_abs_error')[0])
print(hof[0])

In [None]:
# Plotting of a tree:

print(gp.PrimitiveTree(hof[0]))

nodes, edges, labels = gp.graph(hof[0])

plt.figure(figsize=(20,15))
g = nx.Graph()
g.add_nodes_from(nodes)
g.add_edges_from(edges)
pos = nx.planar_layout(g)

nx.draw_networkx_nodes(g,pos,node_color='red')
nx.draw_networkx_edges(g,pos)
nx.draw_networkx_labels(g, pos, labels)
plt.show()

In [None]:
# ALL MAREs + BOXPLOTS (States)

options = [4,6,8,10]

errors = dict()
# -- CA --
errors['CA from1'] = {4: [0.08568452240253191, 0.09012703209846237, 0.08977869484052546, 0.089003410165918, 0.12452540406876761, 0.08056594069962103, 0.09502633729087932, 0.10939312921468383, 0.09865113120609499, 0.10675719597187121, 0.0964761742226061, 0.1065974806194654, 0.09858094367109348, 0.10472585711852461, 0.09436533372635597, 0.10449358850196099, 0.08250368990250545, 0.10250084487978833, 0.09260394183118159, 0.09184294805380008], 6: [0.08302804655278476, 0.09776462662967218, 0.08691125357462996, 0.07951490014218965, 0.11216165539790196, 0.07852752946526369, 0.08819121073402225, 0.10415007202068424, 0.09260159324345438, 0.07603447991700814, 0.09608177132411816, 0.08640077683841102, 0.07406562989487468, 0.062010196921941055, 0.08104720680048538, 0.0825541871089402, 0.09093097302863841, 0.08217179188280517, 0.08635974057336979, 0.08243130804274128], 8: [0.11688287926996856, 0.08173630109695687, 0.0840014741652367, 0.07719161589404022, 0.07453572239048496, 0.09320664659895915, 0.06419369849788759, 0.0727260213319109, 0.07926938370534321, 0.050353301261842225, 0.06330516746544808, 0.07305409335479064, 0.0859340172764639, 0.10311406663807147, 0.06625822167448585, 0.08180534038995488, 0.06556524820963158, 0.07894715786373148, 0.06875676882385975, 0.061164415081671315], 10: [0.05501651747698147, 0.06174512132489023, 0.06237032245332342, 0.05925688441303178, 0.07732244063962648, 0.05958770873627688, 0.11492598962522309, 0.055937945396427415, 0.08902103749845765, 0.0682706264308887, 0.061583474540195006, 0.043611646353747764, 0.079754734013816, 0.053131362203237484, 0.09226233833744704, 0.073238782129085, 0.06373661924454323, 0.0603399199335905, 0.042793121247302376, 0.0711922936963176]}
errors['CA single1'] = {4: [0.11060567374757851, 0.11253125525422275, 0.1061412796514899, 0.10763094981953644, 0.11060944750930829, 0.1032701659222218, 0.1115577694489029, 0.10807756251850971, 0.11650244672271394, 0.11202149786611126, 0.11236007551336633, 0.10741055240392836, 0.11051246610205222, 0.09780242946345875, 0.11197040118226469, 0.11038108033149886, 0.11111173810065372, 0.11087680157600337, 0.10614265388372118, 0.10611105442767017], 6: [0.10024098140399204, 0.08800514830648368, 0.09592439445441908, 0.11396764889649033, 0.10982052700210296, 0.08936699125410505, 0.08964395452925344, 0.10287155314233642, 0.10134487231506056, 0.11015010853858566, 0.09113014895314858, 0.10995941542830502, 0.10861222348061178, 0.09116539561403315, 0.11352121358114228, 0.1031257740083985, 0.09007078505458158, 0.08813486743702942, 0.10611295210445176, 0.10054553212823143], 8: [0.07344236744259189, 0.05929018513068534, 0.07955183235616169, 0.09019694314733384, 0.07434646380570556, 0.06746990223093807, 0.08060217253631906, 0.07673829136852264, 0.0652438084134811, 0.08133471125209726, 0.08995681295503061, 0.06456234532668627, 0.08328262605699938, 0.10891103831588378, 0.08549728414147119, 0.07219574501827332, 0.07311502947205496, 0.08452821661520107, 0.071264075221795, 0.07722490572094651], 10: [0.0895073640113536, 0.08700053881144677, 0.0865145834649186, 0.07455613257033128, 0.10461660161748401, 0.09214361979246201, 0.07353170977751339, 0.08523332351537237, 0.10409550239500626, 0.0695820813278236, 0.065988100243173, 0.09333521376348758, 0.0943120602267849, 0.058842290476834876, 0.04397661006416118, 0.08457724985176546, 0.10107503676554902, 0.0947210252623387, 0.10448357414530264, 0.060920523626181014]}
# -- TX --
errors['TX from1'] = {4: [0.08899242507807069, 0.09060605148200779, 0.09489883841405998, 0.10328274424138029, 0.08597244259196012, 0.08427477117763499, 0.09209445785268346, 0.08561149969163126, 0.09114166513996232, 0.09248366753302782, 0.08890054012034611, 0.08738100246627101, 0.09240169233688339, 0.08155832888789788, 0.09581040280365723, 0.08917802510355585, 0.09314416630215415, 0.08583713234967359, 0.09263033151216592, 0.08780610176630073], 6: [0.08081722803535445, 0.07387957503601615, 0.07575501135688696, 0.08557594815808045, 0.079964979913677, 0.08131979344002052, 0.09111898141442756, 0.07041336256504409, 0.09294298361356783, 0.08134193955783765, 0.0866399280736262, 0.07451517701051072, 0.08717269562550127, 0.08880847995860511, 0.08862260401539807, 0.08926921151814851, 0.07663420374508953, 0.08900011116069236, 0.08828317604403291, 0.08396094948055506], 8: [0.07761130333772512, 0.07076830720010946, 0.08570639989540692, 0.0689392437612187, 0.07921499911968904, 0.07637435338551063, 0.08227841527203197, 0.07593970175088122, 0.0689100634631768, 0.06850765511703182, 0.0708735232632779, 0.07004942202771416, 0.07161933438448932, 0.06689282154110056, 0.07962482511768777, 0.08221964358276392, 0.07829556779371884, 0.07854029378004011, 0.08241751299199264, 0.08162112804708256], 10: [0.0606856343042696, 0.06444916133275574, 0.08805163831136825, 0.057761130963155854, 0.06031406014872231, 0.08094961451183297, 0.07665616312461207, 0.05856211955298847, 0.04033518590511621, 0.08050149415787547, 0.06005470679279588, 0.05286830680391042, 0.07820315985497263, 0.05788598486658697, 0.0722848204489989, 0.0642887725857934, 0.08435499599529774, 0.07055366258780464, 0.0532723078014417, 0.07575900859311602]}
errors['TX single1'] = {4: [0.12000492033714658, 0.12389060398081526, 0.12147594445644257, 0.1340840910855038, 0.118782059915203, 0.11708885769231693, 0.1370715556020892, 0.11493346731872815, 0.1334347936381132, 0.11776876349423188, 0.10994319720529801, 0.1193832792916533, 0.12548468930330958, 0.11644023637138909, 0.12130684684345337, 0.11312182645835796, 0.12028029382411459, 0.1258674482746031, 0.11892752342896841, 0.12323917439936], 6: [0.10612592322071458, 0.11818853824159273, 0.10806993243930373, 0.113500124597228, 0.10147405529104106, 0.10739098649049157, 0.09064698668190035, 0.12177802108279993, 0.11250021001747652, 0.10650603652908805, 0.10722348262757642, 0.1163793613075468, 0.11060829464861667, 0.09634673424847827, 0.11273891131285399, 0.10115613821868764, 0.11553799803430288, 0.11043510229272605, 0.09318308804792708, 0.11965934166765192], 8: [0.11916167859685818, 0.0986863248539672, 0.10435140721283395, 0.09400763946503371, 0.08380734031340112, 0.10165093999749002, 0.10313381035183257, 0.11007283194180542, 0.06927154865669881, 0.09123226146530522, 0.10001882480217902, 0.09676633736063954, 0.09862078544183926, 0.10727401360756858, 0.09008123763322462, 0.09377255745611328, 0.09552054553572438, 0.10928277100167473, 0.08627815772948053, 0.07950783459595569], 10: [0.08271401343408193, 0.06534168951242791, 0.08777888303633247, 0.0836962727263746, 0.11233548438544148, 0.09917621297798275, 0.09492718103581346, 0.07102670274807289, 0.07596652519328696, 0.08096034450604334, 0.09308931082868757, 0.09459551390730067, 0.08104707894656961, 0.10669176198226202, 0.07949700388457051, 0.08879564320809571, 0.09547234581930465, 0.09391915587264142, 0.07217762983271973, 0.08179846114539818]}
# -- FL --
errors['FL from1'] = {4: [0.07527835856837184, 0.08336344129752035, 0.06959557929731212, 0.0764718882465085, 0.0821323712670419, 0.07511640183072647, 0.07108060744662344, 0.08264453968942104, 0.07202009737167742, 0.061622580794013414, 0.0822097969884242, 0.08187626893730081, 0.07417055324729929, 0.0766963955082574, 0.08803584404885587, 0.09292207391981055, 0.07974638870569997, 0.08253921822119825, 0.07392794533417407, 0.0950719053992711], 6: [0.06605962988852063, 0.0770583275135099, 0.07642741396934992, 0.06977340232124235, 0.06999395507591076, 0.0670881075213451, 0.06622973613935637, 0.0685404178752577, 0.08947684287230662, 0.08270222287587173, 0.06160366436584828, 0.06915279034014574, 0.06867666781976904, 0.07300617149847666, 0.06607232838845181, 0.07092287068979071, 0.07261382009130767, 0.06655183544548242, 0.0612428720608917, 0.08111742405886209], 8: [0.047941613868640834, 0.061321976695641205, 0.06422897317557727, 0.055935658646916686, 0.04294113323816811, 0.05278184958908252, 0.06232724564246289, 0.06308183424882506, 0.06089371337177738, 0.06155232919629974, 0.07278446272186714, 0.0521123198924887, 0.04703537943211202, 0.06043866791495202, 0.06467219339564324, 0.06538510881745195, 0.04441422191467823, 0.06198493369357857, 0.05018731609340834, 0.06346924661115014], 10: [0.05073107950306509, 0.051913502941714464, 0.041441960668051746, 0.06815855149891048, 0.04754146012372837, 0.059332350094364966, 0.0595165395305853, 0.04365981940647187, 0.053191149661968654, 0.042047100880309725, 0.05151423887129038, 0.053091805186544036, 0.04848696001067137, 0.06305841653069819, 0.062344282260269036, 0.059221650286522536, 0.03727838625052373, 0.0544881027888251, 0.05413796395075844, 0.058490442851422325]}
errors['FL single1'] = {4: [0.0905896287313716, 0.09084803369258862, 0.08930646216280978, 0.09189213427796825, 0.08349240199113504, 0.09040157887231347, 0.0905896287313716, 0.0837347607269576, 0.0891482928944811, 0.08111424039920358, 0.0905896287313716, 0.09099395406958956, 0.09173527155360985, 0.07870834027367743, 0.0915498303912875, 0.09292419280927176, 0.08339195000810559, 0.08371066151489512, 0.09114571611779328, 0.09149278016109319], 6: [0.0905401537285123, 0.0864630875326113, 0.08239694345908859, 0.08144003884873625, 0.0737901941765385, 0.06302420981288635, 0.08370868562650684, 0.08076201834800256, 0.0653359762881693, 0.09023167086344498, 0.08151083529809927, 0.07424010458058201, 0.07220193065954862, 0.08107600985299926, 0.07023833773754835, 0.09058821256783917, 0.07477956365337567, 0.0907152449699694, 0.08980607110086342, 0.0725585249979671], 8: [0.0793109589405879, 0.060208272524935075, 0.07519417293687707, 0.06587349786741001, 0.06667760951112786, 0.06605831187796539, 0.05879655670408012, 0.06877452273524653, 0.07211773983550811, 0.05758774714867815, 0.07464622507228053, 0.07083553652168346, 0.06775162752331111, 0.07753172436464807, 0.06705589437313755, 0.07881222968951777, 0.07246491253755609, 0.07015147613134805, 0.06905269182257079, 0.051931092762534804], 10: [0.05965040838783519, 0.04629950925594964, 0.062899892818416, 0.06434512247134323, 0.06386191098679493, 0.0687465208923184, 0.05314874105369342, 0.06856049800544531, 0.038279515475183955, 0.06492878553749301, 0.06500692956232898, 0.039762796650222886, 0.058420197351866474, 0.0452855086826585, 0.05659554506498858, 0.0797329779834877, 0.05993439830816684, 0.04070026745300974, 0.07476490702349882, 0.07245282651532928]}
# -- NY --
errors['NY from1'] = {4: [0.13691036820410776, 0.11463743045681017, 0.14129249244766368, 0.1271555718619209, 0.14149178452440486, 0.13994208381584122, 0.1424351151237456, 0.17122374919905475, 0.10863666344423013, 0.17843106775940176, 0.13240129954580832, 0.1498398349705672, 0.1131599425054404, 0.13124267853882293, 0.12448733694042487, 0.14222949456209605, 0.11797720606694789, 0.11390595279290769, 0.13180729492308838, 0.11474362401824789], 6: [0.09019840603567486, 0.12368865060559055, 0.10533177176322785, 0.10953942975888643, 0.1040495700797711, 0.122385173096992, 0.10106938631122334, 0.0886249424617385, 0.10444957836891058, 0.07671455105859894, 0.11840218077320806, 0.1179096869719457, 0.12211316425368068, 0.11340249056740259, 0.10961578400014908, 0.12357545704019819, 0.10755470612892028, 0.11474689973984287, 0.1194886088089192, 0.10476410164687974], 8: [0.05911141522654816, 0.1024026177262343, 0.08530109314416366, 0.10052443041658123, 0.10681695564097854, 0.08820832932029664, 0.07236244721603434, 0.10233211271351356, 0.11018602610000808, 0.0956913871740151, 0.1047892361503323, 0.10939930796725712, 0.10799998589384491, 0.11825364400013652, 0.10801268338338098, 0.06573042675408054, 0.08731098460269673, 0.09423749186846117, 0.1184409946404764, 0.10866989305935426], 10: [0.0935591851385932, 0.06451982570686425, 0.11389563357660172, 0.07415900261224179, 0.06948688379744233, 0.06677783428770936, 0.06832516362306054, 0.053698693219156116, 0.08233306369720438, 0.0560966333271936, 0.08034743219108828, 0.06149586077413669, 0.09311696361927302, 0.07823886711847096, 0.09520233744661324, 0.06388279485179907, 0.05443642802448739, 0.07964799480314858, 0.06665424034772716, 0.09965885183258591]}
errors['NY single1'] = {4: [0.1354001130773588, 0.11606305315254495, 0.12502853786801948, 0.11930271682550808, 0.10473138191844136, 0.11537381199345424, 0.11120302929424826, 0.1265787022605072, 0.12155727693356963, 0.11689088036882424, 0.10459073448004078, 0.11606208907280992, 0.09028753485941896, 0.12095945491378117, 0.12709166986718937, 0.12309042971802138, 0.1276609040477908, 0.12578118355157392, 0.12553003526680015, 0.11905331233341357], 6: [0.10201131114106082, 0.0806298184390036, 0.07129980518187305, 0.09829920493535677, 0.11137753965043604, 0.09998803617254107, 0.1022062815598638, 0.12054804977895986, 0.12441566288756928, 0.11795367879343298, 0.0964696767112205, 0.11807189900804317, 0.1032082128236444, 0.10712275930404862, 0.11222086081649968, 0.12520097995199608, 0.09590035863511337, 0.1215597119485673, 0.0963372525691398, 0.08836895451743271], 8: [0.08128032235304632, 0.06619238591161385, 0.08245145224239833, 0.08793572056847869, 0.10049338269336076, 0.07746183221614662, 0.09519439731311631, 0.08869808986622513, 0.06782675508980746, 0.11423435208250328, 0.0960725688267615, 0.0849304265889039, 0.07464481774680495, 0.08667786152325412, 0.11724434768711119, 0.0862223999481294, 0.09938453290305217, 0.07449971903984277, 0.09734335264967266, 0.08391270752875445], 10: [0.0866403357994776, 0.08683370232219713, 0.05311199358777757, 0.07673083570159235, 0.060380783532674336, 0.05839184234328331, 0.08841305907108438, 0.10398273616730042, 0.049253010157747995, 0.1285300294668738, 0.08559574714952854, 0.11351525091768062, 0.05813418953760353, 0.08585217059457248, 0.12852415972324574, 0.05575310542562779, 0.07369513784734484, 0.05990523390263246, 0.0513298487621089, 0.10656769870497676]}
# -- PA --
errors['PA from1'] = {4: [0.10606982914059683, 0.10176853477778856, 0.10152174643003965, 0.08677603222794186, 0.11676635448662034, 0.09600049367627787, 0.10124882005983023, 0.10752406235186017, 0.11204606590657033, 0.1061923128954071, 0.10220953984647525, 0.10799919884581123, 0.11281851790641924, 0.11527551431420037, 0.09568540895309473, 0.10776553449314792, 0.0933338701847474, 0.09858740996117547, 0.09764583997551117, 0.09867892593982248], 6: [0.06451909660678776, 0.07261148219774917, 0.057133278687557555, 0.09617954021898148, 0.09327563833670884, 0.07361445057972042, 0.08616949185546369, 0.09526890621323071, 0.09373640847512298, 0.07346567373119219, 0.07100143026825237, 0.13633395280848024, 0.0673221976490736, 0.08409670752010306, 0.08099561945205354, 0.09155668456274563, 0.08946004662322485, 0.09879058958640353, 0.06406918336307854, 0.07756155717737381], 8: [0.07855700982426078, 0.05432776146614194, 0.05045076529014877, 0.05715429289569441, 0.06585085071803123, 0.06628524179889657, 0.060798322179307494, 0.07682767027460094, 0.06286587352429274, 0.08072905853384393, 0.06856321473487904, 0.0748953858314751, 0.038398808252033664, 0.05779973210479798, 0.06100352041401312, 0.07227917132179587, 0.062372201778202976, 0.07408985677899056, 0.04511107131860671, 0.059290924362381425], 10: [0.06655063397626478, 0.06592113064878534, 0.05741484272319485, 0.07953715997909062, 0.05156233306893509, 0.038749310701080564, 0.06475224734117047, 0.056868025189829835, 0.06862278694426721, 0.06513054029108306, 0.029256149808399266, 0.05135185709145628, 0.045856723440689744, 0.05292080966419352, 0.03932783653403446, 0.042618006253855055, 0.0675215205631952, 0.05074001152656384, 0.06380512248001251, 0.060288987290371104]}
errors['PA single1'] = {4: [0.10807074097512047, 0.105025324343872, 0.10441049967636953, 0.10157054846447998, 0.07832397038823548, 0.10850350449898266, 0.10484501968567296, 0.09744079491178197, 0.09972312542703755, 0.10761701895591531, 0.10554955445655224, 0.10652128965764743, 0.10814769365288295, 0.10390073977676995, 0.0888979453173576, 0.10369708028871949, 0.1017250064781089, 0.1079412717908893, 0.10864773726676727, 0.1084973594012606], 6: [0.07040010103701397, 0.10143107586690961, 0.07615460405197991, 0.10048722582565196, 0.10276413890281472, 0.08674997835145465, 0.09712769112897826, 0.07773032928884836, 0.08342925188831168, 0.09998386518627785, 0.08622257818599632, 0.07594558425216674, 0.08940116833756169, 0.09450302309697348, 0.08433867503249984, 0.09690084057365278, 0.10022993832675328, 0.07975137722242129, 0.08980307408699775, 0.10024522794757834], 8: [0.06915307536763395, 0.07619071702267764, 0.0782427986829936, 0.06968994084462136, 0.06770496722836794, 0.06988490101729466, 0.07939744568262302, 0.09622467161845932, 0.08061463988634375, 0.0630966072573019, 0.06683295517175218, 0.0785234151971659, 0.07768212870284806, 0.08292766800289639, 0.08042669242529105, 0.079101939769411, 0.07667460851702863, 0.10033706394642815, 0.0987447381447491, 0.05479918262837497], 10: [0.06312326231864201, 0.07191305042545228, 0.062356077439116246, 0.0635194752626243, 0.08764626829048101, 0.05627931736211647, 0.07213349832772901, 0.06873682094378335, 0.06362941617815246, 0.05561677815185666, 0.07378786683961862, 0.05446324902803455, 0.06681981453751294, 0.08026612847685097, 0.07660703344224841, 0.057865547882640964, 0.065371951449985, 0.06660896476013702, 0.04487407194562607, 0.046761610156220976]}
# -- IL --
errors['IL from1'] = {4: [0.07353403537835544, 0.0782181365505771, 0.0744383418166315, 0.05889983625213517, 0.0580477000825198, 0.058624614452722074, 0.07688327733966124, 0.06600799671254547, 0.05806804378034407, 0.06657308758674875, 0.07658029077503346, 0.06267614894741015, 0.08211342811357858, 0.0739020484380724, 0.05706168838708976, 0.07591538948951512, 0.06030839989600202, 0.06333475868321044, 0.06955729266402798, 0.06024682430133814], 6: [0.0597648449349401, 0.052271657037149324, 0.05742609411674089, 0.05984166839015861, 0.05053589838775046, 0.08007255589490511, 0.05470598688867385, 0.057887423917730066, 0.05730550466353496, 0.0632636223266569, 0.05978235604223983, 0.052208590028229425, 0.05616515344668195, 0.05199412792193371, 0.06050735346121091, 0.05851966000725341, 0.05862297514225652, 0.05554828208760412, 0.07677320995236929, 0.05667667165931891], 8: [0.044500963804260475, 0.04821498587113296, 0.04515879814090629, 0.04650402260055825, 0.05752727307313089, 0.052377169059887166, 0.04329906890185667, 0.0557335778679469, 0.044657023149087496, 0.0594797026726537, 0.058501597416683884, 0.06231511426295547, 0.05143467612292637, 0.07596513236887538, 0.06528514456067737, 0.03973040495995371, 0.07643636535857821, 0.04968089725062137, 0.04596573679132892, 0.044472535758613325], 10: [0.0587543500027148, 0.0515693315373101, 0.05356648541478526, 0.05173232770039463, 0.052055489547949016, 0.05577990727909822, 0.054148945347339265, 0.04431851077755427, 0.04239472416546443, 0.04898453150007883, 0.0596974916719036, 0.04801102160681745, 0.10635397999531741, 0.05054848378803182, 0.04223649155916373, 0.04512293252598796, 0.04841701107352275, 0.039283334553128414, 0.04851483939891549, 0.05043834181682461]}
errors['IL single1'] = {4: [0.08783605581135727, 0.09036970529957374, 0.08744630967058288, 0.08964719057994905, 0.0887253505657891, 0.08452835518203701, 0.09019543808850308, 0.08506996740858691, 0.09146142972147094, 0.09237640427001699, 0.0901954380885031, 0.09618172523479349, 0.08852478409012689, 0.08696157843952806, 0.08603443603528414, 0.08573068308006729, 0.08517185617523101, 0.08870895876559269, 0.08756668914511503, 0.08677479527931628], 6: [0.0835240372994946, 0.07522054832024333, 0.08906747809927056, 0.08468644060542363, 0.08550048526213327, 0.07879388583931828, 0.07824259393575808, 0.08293297157897785, 0.08184424484857838, 0.07477589715162869, 0.06678765628809794, 0.08629415394258473, 0.09144066593696543, 0.0852115046320944, 0.08387587500724752, 0.08268519174539571, 0.08304070672317898, 0.08707512792778206, 0.08199650213163825, 0.08561896629773544], 8: [0.09066471762641153, 0.09382030412682872, 0.0780653999992178, 0.06876536916585789, 0.06270285732054026, 0.08369139902232257, 0.06121404598281711, 0.07149455379779267, 0.0807933171173432, 0.07977487836870485, 0.07568074504391295, 0.07065307776837047, 0.09336564968847962, 0.07685811481973435, 0.0619520872832909, 0.06493555619189394, 0.07742285262189633, 0.0536052014796184, 0.0732416309309427, 0.04260642978864563], 10: [0.06626905843851551, 0.07206901912060856, 0.08103076917327487, 0.054556362369821414, 0.0737649875161348, 0.06602864810053151, 0.048656055732948914, 0.07437661776714082, 0.0729437466591906, 0.08304688437495597, 0.08276090658852504, 0.08249843229224761, 0.08479453342715612, 0.08135361873139904, 0.07429977644912379, 0.0734082987193487, 0.07062864328448017, 0.07948555454823575, 0.049994316491908566, 0.059815367453152]}
# -- OH --
errors['OH from1'] = {4: [0.11983066597469504, 0.12668987973927387, 0.1168081385405676, 0.12087336619078069, 0.12305033624643696, 0.13739732090234633, 0.11599962809805958, 0.1428970732183619, 0.14120860164627128, 0.12496660163574047, 0.13654338410396374, 0.14197331266595634, 0.1313858676115528, 0.11555618499917585, 0.128566395098662, 0.13692125224919394, 0.13559671331554687, 0.14827422370008045, 0.1321787150171412, 0.12176092736498731], 6: [0.07150209864171507, 0.0924281297766642, 0.07694092518613015, 0.10261595368444858, 0.12429103583687665, 0.11860117458598789, 0.1176704555910761, 0.0995723544514214, 0.10562890551459668, 0.07649956192723419, 0.09221803026533829, 0.10517191058926822, 0.08824772904239236, 0.10975180897005249, 0.09598957596447831, 0.12093438304676166, 0.07157076307578608, 0.12416658989496648, 0.10502465452095558, 0.10391334878529337], 8: [0.08800914704987463, 0.06972635104296021, 0.13188706096557448, 0.06747446882693756, 0.07996507707091918, 0.11616238068076229, 0.0813535083556418, 0.09121049340919404, 0.0548489060565113, 0.127771808584952, 0.05788634834250297, 0.12043714799390551, 0.12176452249316277, 0.10596255639935452, 0.06842372761893784, 0.07269716924479469, 0.07880288722858904, 0.11347100370323716, 0.07465060806491423, 0.07106806168147282], 10: [0.07106052514669893, 0.08163425535292275, 0.07848804915540199, 0.03587755918885785, 0.09868980478769303, 0.08270913666814143, 0.1032215454666851, 0.09765596466831085, 0.07007721896304424, 0.04117266958217143, 0.12125964459544011, 0.0687633554498295, 0.07385900392181108, 0.07772660188874522, 0.10475268274285988, 0.07955126660594972, 0.06971109401015053, 0.12010449022514018, 0.0732551751261825, 0.11606024219597366]}
errors['OH single1'] = {4: [0.11673069632288613, 0.11673069632288613, 0.09983364816508428, 0.11372542837502665, 0.10773844918586034, 0.12167828487109747, 0.14047541460270008, 0.11673069632288613, 0.10760097185446858, 0.1383801693395047, 0.11531629383778152, 0.11526760284439197, 0.10651153484650018, 0.11579831216512501, 0.09727354908959268, 0.10314845532093628, 0.10103246859105884, 0.11739100588170183, 0.11806847550261473, 0.13988901964099604], 6: [0.1318555248799672, 0.08688672881811355, 0.09062836462933656, 0.08323089387790489, 0.07498041684287562, 0.09230754821562812, 0.09228549193140387, 0.10730672181971691, 0.10249127702063394, 0.09697982920612529, 0.08104587263435845, 0.10475294195404547, 0.11065802322346467, 0.09830186666303085, 0.0880473897788775, 0.0950797370268567, 0.08764781349878656, 0.08329278465391547, 0.11053251992646067, 0.08822341027233614], 8: [0.07341452666115267, 0.08896180860022376, 0.04271718837816434, 0.10032226874427023, 0.09323712549766909, 0.09096349926358137, 0.11701266537548426, 0.08445458438802564, 0.07150942045379638, 0.09533037292690132, 0.0666052791620136, 0.1003196082699181, 0.07041830399737392, 0.082106934993057, 0.08165852354351348, 0.09129107899413025, 0.0885292582577304, 0.09970545583031504, 0.10479335321195639, 0.0787639745046214], 10: [0.1045276245813267, 0.038640911561913655, 0.08963858021038759, 0.09251197726029685, 0.032901006551598534, 0.13328466560588878, 0.0497688283235825, 0.09479623727749385, 0.07680426252079352, 0.09202933846349866, 0.0797976320668161, 0.08940276381178407, 0.0684481483584121, 0.05949922565721861, 0.06703431420224788, 0.06761839601802301, 0.06095848203606505, 0.04014814721603372, 0.10882091281290446, 0.05428689684151594]}
# -- GA --
errors['GA from1'] = {4: [0.1767358526312925, 0.17014107545970933, 0.18562310606972, 0.1809348536882757, 0.17209351133937767, 0.17937253132481723, 0.1499278899481796, 0.17884656123209255, 0.1733472092092413, 0.1663967158302095, 0.16949989764991188, 0.161789939733875, 0.17785505125418324, 0.1385573020139439, 0.14504666596591645, 0.17324136523667355, 0.1693857122847887, 0.10942611321012166, 0.16406133155061828, 0.15603186700555008], 6: [0.0936416935837468, 0.14385218591345358, 0.12721570569778778, 0.1722044476849821, 0.15077238368536114, 0.08778047267149577, 0.1400476844846844, 0.09497434590309481, 0.15699545634272297, 0.1214426583205556, 0.18106755596599555, 0.0739173457443521, 0.13849397539471034, 0.15702255545571261, 0.09073063197785394, 0.11919044498017177, 0.10431894632327562, 0.13843992266543956, 0.11992358043478978, 0.14154441373583926], 8: [0.08145901657505562, 0.1344013649215642, 0.06464701009208255, 0.07474983207223276, 0.0949426376904619, 0.0828004690072112, 0.11249545150115772, 0.09245095437594851, 0.08158787540966195, 0.09405084627657835, 0.14281281455727274, 0.08799569319512078, 0.18160177830760846, 0.08090133513284128, 0.08973881965971958, 0.06813987002923028, 0.15668011789202635, 0.0725974070904964, 0.08064696847436452, 0.12417044100422577], 10: [0.11306688385093853, 0.04793595397273783, 0.0819720842816866, 0.10835694279294661, 0.07460805397619652, 0.12486958982782545, 0.07747367202895397, 0.037712457206705484, 0.10503606086606834, 0.06229138075215591, 0.06337843315231452, 0.06280873030517427, 0.08405654095576186, 0.044629807326187494, 0.07236410608582752, 0.04400651213889502, 0.06688283172567575, 0.08047764434315056, 0.11516724977875387, 0.057719931229509934]}
errors['GA single1'] = {4: [0.11747741103517069, 0.10937525437360307, 0.09970093910594649, 0.1079630787763931, 0.11544738610731789, 0.11439853882276968, 0.10769063915719043, 0.10351906732805186, 0.11318013410963439, 0.11629156994381355, 0.12334015047696967, 0.11079646278899995, 0.10078968235572755, 0.10131781525374844, 0.11550876417313556, 0.10149876801378697, 0.11569291384548559, 0.11957067341362441, 0.11156427129569636, 0.10951409283790041], 6: [0.07739961881371132, 0.09698300156998424, 0.10408966955344642, 0.08712399100739379, 0.09364011092549408, 0.07712761253261656, 0.08858200792182522, 0.10727657523274527, 0.0949258314107977, 0.09240331453309793, 0.10343386018302271, 0.10722625062313221, 0.09931432634903145, 0.09457855661391819, 0.10768276324536578, 0.1066230364897531, 0.08659793234922965, 0.09893738436480204, 0.09882995822728156, 0.0934688017312827], 8: [0.07608590900718176, 0.07932994880828675, 0.08396835290068594, 0.08044434309068284, 0.09034438816348161, 0.08871868939635573, 0.09455359113457563, 0.049726143611788974, 0.08828898481018808, 0.0728667668306979, 0.08086918240672653, 0.0693258880741426, 0.08924348209676518, 0.08428918693124253, 0.10129160260389777, 0.09842379650670478, 0.08584486900723234, 0.08080819484112435, 0.09608481067483719, 0.07425585931966994], 10: [0.08001385172693821, 0.07937579207578961, 0.055573533324939514, 0.08071535215387342, 0.08473031004626487, 0.0782200794266665, 0.0717170775155794, 0.07955361353920644, 0.07750363205397084, 0.08456981255751475, 0.06988427454751675, 0.07447605374178637, 0.07027880325721118, 0.049910389711733345, 0.08414826523995007, 0.08242957178139768, 0.0847803156640769, 0.09856361870686403, 0.051378237051015206, 0.08773927617174554]}
# -- NC --
errors['NC from1'] = {4: [0.13734498166177578, 0.12482365646691583, 0.13561717360819914, 0.11883866288738118, 0.11038473356874043, 0.12321797276705036, 0.12476661130483004, 0.1374452851434484, 0.12290569078473933, 0.12639582915664663, 0.12639972866857418, 0.13689708872610895, 0.12311375358206794, 0.12672835408194313, 0.12940068404470118, 0.13273843192272963, 0.12758211768085517, 0.1224652370371936, 0.12390542212316012, 0.12193640063076425], 6: [0.09409701664565615, 0.0894469976707086, 0.11827353748150964, 0.08702817300682926, 0.12269774870300011, 0.1019620155632699, 0.116830535272654, 0.10806901426130076, 0.09134650430874479, 0.11545514480868087, 0.0756050117888105, 0.08194676966081137, 0.11112355557873156, 0.08555399838930187, 0.0949667896946977, 0.12551095554820574, 0.10320434020685285, 0.11972068870657404, 0.10729374539474691, 0.10597585008967073], 8: [0.12218441025960336, 0.07647957586967952, 0.07969189670313279, 0.09813796193650454, 0.08349653278086096, 0.11290838919634046, 0.09414837222960169, 0.06479700626809912, 0.10098382055031038, 0.08497446066073415, 0.07294438601169287, 0.06934449368850407, 0.06971435902951782, 0.09329496524997617, 0.12216072706194672, 0.06479383744560113, 0.10201511898739019, 0.11849587331061268, 0.12043699497358147, 0.08738039958314582], 10: [0.04402540960865365, 0.06635656070236404, 0.08021445150579344, 0.0632694309855465, 0.058922347489353634, 0.10662800891175339, 0.0641207496293762, 0.08170705572423022, 0.085493903836368, 0.05716930497035131, 0.038967217673202645, 0.12336234922197993, 0.051274009425492185, 0.04292435802895406, 0.06257573230810476, 0.12241389146788205, 0.0854645146920853, 0.08520655827924309, 0.06479013741881809, 0.06825244771685388]}
errors['NC single1'] = {4: [0.1438715383536027, 0.1344739323074044, 0.14082738083440488, 0.134465737958164, 0.14307079380915816, 0.12608194430818456, 0.14012037106189285, 0.10356419977033446, 0.1427303469840847, 0.1357947722349173, 0.13997012477308274, 0.14001369040014058, 0.14082738083440488, 0.14310349297667013, 0.13568137810645362, 0.1432787520578976, 0.1253174333071537, 0.1407453977021052, 0.14046380277215104, 0.13944767510911188], 6: [0.12487093249216889, 0.1182448335492305, 0.10848308539341142, 0.09325061587846772, 0.10250340727786139, 0.12544922883309859, 0.10306870465301443, 0.08690429625916318, 0.09470574966951649, 0.08818257646379471, 0.1295427591619354, 0.11480435818362626, 0.11727307111945817, 0.12701817335395288, 0.12063684335780854, 0.11704973832653592, 0.10959763412265698, 0.1323657131197144, 0.11928073835032639, 0.09417382795807204], 8: [0.08333303297989553, 0.1220009966161928, 0.11191420316075401, 0.06373797280748375, 0.110234052305664, 0.13381847929078658, 0.09055125904180907, 0.06240211632954043, 0.13016469590778074, 0.10312664520034807, 0.09588237172827968, 0.0993949542790607, 0.07011420141417857, 0.08310569753832137, 0.07144002766303803, 0.11995357414444228, 0.10003083748250645, 0.09821653612615092, 0.10526190403813952, 0.07979997242136996], 10: [0.059112296795995264, 0.07060828755048029, 0.1038523207670447, 0.09122167185254662, 0.08601538064392841, 0.06913248689896785, 0.05895573035311135, 0.10664907397698586, 0.08708666406563456, 0.09206535517760736, 0.10450932048457352, 0.08991860417115217, 0.08250134497231268, 0.11087797769193992, 0.08016037415307195, 0.07873142332226447, 0.06563944975731684, 0.08181647590088448, 0.09154024635383486, 0.09886571812036889]}
# -- MI --
errors['MI from1'] = {4: [0.06526475059135126, 0.07263576297910555, 0.0805654616186796, 0.07558002305862663, 0.07892687783905622, 0.07892917805258308, 0.06482264623155681, 0.07222277870343129, 0.07198855049848796, 0.08645789025023128, 0.06036886881916402, 0.07091804666129513, 0.06465637964243003, 0.0876990538664372, 0.07874697239788574, 0.06341523961094905, 0.09655018661817936, 0.0728112191357526, 0.07570709680753543, 0.0697047585544615], 6: [0.05937090962624587, 0.08431702429912488, 0.05495684731452914, 0.05626229679519742, 0.07593051747331295, 0.060684305408246014, 0.0643169057455892, 0.05552908407843642, 0.05769439188792858, 0.045619104301131114, 0.058069113127307996, 0.06516498812269679, 0.0893954220002498, 0.08470273869963595, 0.07107066438850497, 0.08013592221688748, 0.04937569834653419, 0.06224174934710714, 0.053189036469503424, 0.06267056991629649], 8: [0.04390370989458254, 0.04380453905832362, 0.06098060020423806, 0.066569442119378, 0.06648109020994708, 0.05117705978778979, 0.0577184501341358, 0.053815187968133185, 0.060114355273820835, 0.0360634078280241, 0.07481464441331305, 0.05714129429961235, 0.03427970090811627, 0.12211581733555413, 0.04984074999209233, 0.05049813954656777, 0.05594669261680632, 0.10209986821910562, 0.06393959817359998, 0.06182158081913633], 10: [0.051697983421237305, 0.0357766753972797, 0.043323790151773955, 0.052142765258232755, 0.07265088025565117, 0.0384419067206317, 0.027576441532685448, 0.05452812521489387, 0.04516487535884603, 0.03614701986156453, 0.03194896490866109, 0.03248128880113709, 0.033048401982786794, 0.06423568285417766, 0.06700907352142511, 0.05756815241059459, 0.03848012164219851, 0.039195440840952715, 0.04117560441259898, 0.057926280158099024]}
errors['MI single1'] = {4: [0.07150146137045924, 0.07534249653841515, 0.08096970634939825, 0.07222164919269583, 0.08504918315451158, 0.0655260786108823, 0.07823268555084577, 0.08013546119399072, 0.07875924793151431, 0.07725929721431413, 0.08588818916991664, 0.07146722168600028, 0.07222946957256399, 0.08588818916991664, 0.07285867025943459, 0.07634056717951908, 0.07084780499121145, 0.07641610418040276, 0.07676927439140674, 0.08346900864633373], 6: [0.05797424905481146, 0.06923772795089542, 0.07561363340630853, 0.06975378729952142, 0.06452939350615197, 0.07876983057841895, 0.0736782147063357, 0.06958986783025724, 0.06272985110153159, 0.057695093032778, 0.0530525630565851, 0.06270095302486112, 0.07147272834091475, 0.0658605608816525, 0.07687323447662196, 0.06994747679259163, 0.064515665187866, 0.06757958287994718, 0.07159629900201812, 0.06806011072790615], 8: [0.058289805922948086, 0.05660000044163828, 0.03693383701456472, 0.04717897977607753, 0.06675858951126257, 0.06500107690048436, 0.061316558775109324, 0.062200889135484036, 0.06307121264887665, 0.05878175460836052, 0.05708188569219626, 0.049513658476672544, 0.05599426265215325, 0.03946707763931756, 0.06277765660934376, 0.058460941693387475, 0.06240528301196263, 0.06724216351189286, 0.06064327498569557, 0.06760230473117942], 10: [0.058189150821660277, 0.0550585476982178, 0.04685015274998835, 0.04489229258759693, 0.05752567404283083, 0.0494278651736501, 0.045654420292096924, 0.051921895786451684, 0.04752258749420864, 0.05436633474498935, 0.05368608625822757, 0.0304229648814008, 0.0652990280306322, 0.061695534459521216, 0.05003090224847075, 0.06485166303035755, 0.04096573171941814, 0.05600623969643404, 0.04660906165869701, 0.046095617744083786]}

depths_log = list(range(len(options)))

lin_err = dict()
lin_err['CA from1'] = (0.07882819497780959, 'blue')
lin_err['CA single1'] = (0.12130853703582326, 'orange')
lin_err['FL from1'] = (0.07715402425291812, 'blue')
lin_err['FL single1'] = (0.09790750594108193, 'orange')
lin_err['GA from1'] = (0.13506, 'blue')
lin_err['GA single1'] = (0.10605, 'orange')
lin_err['IL from1'] = (0.07052, 'blue')
lin_err['IL single1'] = (0.10986, 'orange')
lin_err['MI from1'] = (0.06643, 'blue')
lin_err['MI single1'] = (0.08613, 'orange')
lin_err['NC from1'] = (0.12125, 'blue')
lin_err['NC single1'] = (0.12217, 'orange')
lin_err['NY from1'] = (0.11876, 'blue')
lin_err['NY single1'] = (0.12704, 'orange')
lin_err['OH from1'] = (0.13971, 'blue')
lin_err['OH single1'] = (0.13703, 'orange')
lin_err['PA from1'] = (0.08304, 'blue')
lin_err['PA single1'] = (0.09449, 'orange')
lin_err['TX from1'] = (0.08973209185987645, 'blue')
lin_err['TX single1'] = (0.15352029555597502, 'orange')


fig = plt.figure(figsize = (18,75), constrained_layout=True)
plt.rc('font', size=24)
#fig.suptitle("Mean MARE and depth per maximum depth for {0}".format(state))

states=['CA','TX','FL','NY','PA','IL','OH','GA','NC','MI']
ax = dict()
for i in range(17,21):
    interval = 'single1' if i%2==1 else 'from1'
    state = states[math.trunc(i/2-0.01)]
    ax[i] = fig.add_subplot(10,2,i)
    the_boxes = [errors['{0} {1}'.format(state,interval)][dd] for dd in options]
    ax[i].boxplot(the_boxes)
    ax[i].axhline(y=lin_err['{0} {1}'.format(state,interval)][0], color='blue', linestyle=':', label='Linear model')
    X_axis, Y_axis = depths_log, np.arange(0,1,0.1)       
    ax[i].set_xticks([1,2,3,4], list(options))
    #plt.yticks(Y_axis)
    ax[i].set_title("{0} {1}umulative".format(state,'C' if interval=='from1' else 'Non-c'))
    ax[i].set_xlabel("Max depth")
    ax[i].set_ylabel("MARE")
    ax[i].legend()

plt.show()

In [None]:
# ALL MAREs + BOXPLOTS (USA)

options = [4,6,8,10]
errors = dict()

# -- TOP10 --
errors['TOP from1'] = {4: [0.19933297691885282,0.22521251954324598,0.207095987741527,0.20368163088361765,0.19052415835439998,0.24002492644319376,0.18815557359114507, 0.20388454879775256, 0.22566157906523784,0.21154846718537387, 0.21145452381717703, 0.20631729711694657,0.1969548347851832, 0.23101045772370707, 0.22592285035939744],
                       6: [0.19819759192214503,0.22445324570471895,0.18495714161658589,0.1934344214866833,0.18855832438493247,0.1721603130031015,0.18403167338758958, 0.1742858176451267, 0.1768383782256373,0.1992011762069552, 0.1868703822873946, 0.19309785554823486,0.2071913759171278, 0.2163143995356923, 0.2012489501827682],
                       8: [0.17368409987674222,0.18680896828014135,0.17843909174381564,0.19069980791952126,0.21414684340433265,0.17457358291497468,0.22179879102132785, 0.17020593299473663, 0.18043798746086567,0.19756214734881278, 0.23205453136758325, 0.17623587628627965,0.1792173368429657, 0.1687935031214721, 0.21287141318162053],
                      10: [0.17303199214896073,0.16600918422089025,0.17665297879191713,0.19205838599747235,0.18799488033996498,0.1651198066224214,0.1674554625947972, 0.17293460957534135, 0.17013373526683115,0.20657449082557422, 0.16449221532242722, 0.20292161940975315,0.17398109997395844, 0.21559376366047533, 0.21059911528584477]}
errors['TOP single1'] = {4: [0.12935740546654828, 0.12401396514971427, 0.1241585685382054,0.12668033364208592, 0.12439531996038991, 0.12728207377098033,0.13133262671635199, 0.12850836615085698, 0.12339800466371566,0.1261395885023979, 0.1272096583589788, 0.1306745456843341,0.12304654524906095, 0.12850836615085698, 0.1300418421512653],
                         6: [0.11705000754292351, 0.12171097957261705, 0.12226535537471175,0.12001767922040633, 0.1263990437489172, 0.11961376916826064,0.12026370702356246, 0.11894041879467462, 0.12596221315180908,0.12499828703054511, 0.11976281164671912, 0.12281560592314039,0.12232197076568119, 0.12094619342919764, 0.12170970830978545],
                         8: [0.1273320710244, 0.11873099456125608, 0.12207351515971161,0.11961062791086256, 0.11746915389597465, 0.11698334526887524,0.12018284699787551, 0.11436966010343916, 0.11472398913221546,0.11891580290454568, 0.12200639196987233, 0.12119826105565465,0.12138088193763627, 0.12046350003305553, 0.1179221475598681],
                        10: [0.11661190488517888, 0.12045535310995968, 0.11929623496621793,0.11738456701074523, 0.1155309722714031, 0.11612975822963295,0.12184767391900977, 0.11559489524528618, 0.11529852072793943,0.11996601684168198, 0.1167453422730448, 0.11778585145189738,0.11894102271896158, 0.1181663666932716, 0.11543837158013191]}

# -- 29/30 --
errors['2930 from1'] = {4: [0.3125859277742697,0.32990619440449476,0.32018661313563207,0.29973238669323343,0.31601705962063664,0.2975799559875016,0.3587815124231563,0.31789973050733167,0.3071013148485277,0.29973238669323343,0.30620186116095705,0.3025299050491288,0.301090725820133,0.30783695480021106,0.3178997305073305],
                        6: [0.28559255250258325,0.28097822315971027,0.3343795500491453,0.3376838295332867,0.28185352398479996,0.2950418685667153,0.285459290058431,0.30386267014906077,0.28455795625361313,0.34034079751496815,0.30991306267451574,0.2785907088898535,0.2997390280394261,0.31126419872319744],
                        8: [0.28564816326496745,0.2770541315031589,0.32234215884081135,0.28342218543902736,0.2662058738900327,0.2735906622896779,0.2959051259067192,0.2516868058181431,0.2813661881960891,0.29278848761814724,0.2999700997721025,0.29088339599938196,0.3268828258452247,0.34281550374997616,0.27583940372376176],
                       10: [0.3610096904351589,0.31823140698833996,0.2905140565690702,0.2951922389618865,0.2742370226485044,0.26876334371047134,0.3173071624205732,0.29506628143876357,0.2489548127777094,0.3062214978226318,0.2790251388745037,0.27122835335349094,0.2774473174884538,0.2827920592176004,0.2755546035410414]}
errors['2930 single1'] = {4: [0.16456729990343757,0.1601631740234981,0.16504737789255722,0.16434881804725912,0.16331103446416972,0.1621318600187578,0.16175139069479888,0.16446269641921168,0.15991366828306816,0.15845934597512076,0.15942767712827405,0.15727433453715584,0.16092956767691993,0.16107767619970376,0.16240751561990338],
                          6: [0.15868184313022668,0.15658647120022332,0.1632346098621593,0.16282396664197044,0.1566490889691341,0.1564440420717021,0.15593172368387964,0.15513560118628958,0.1568343779233425,0.15746458488472617,0.15590183180677195,0.155471621131811,0.15750297998717538,0.1560145131629556,0.15562190024536382],
                          8: [0.15667874589935718,0.15496210610636946,0.15599022230334506,0.15314896227841604,0.15421548349045877,0.15300262919195756,0.15530863298893338,0.1540018077554052,0.15769720943399543,0.1533424483635794,0.15370080520687215,0.1543583058331613,0.15415987189581973],
                         10: [0.15432351028757144,0.15549163781902564,0.15591567416291538,0.15435153036585947,0.1537473831691259,0.15057426256271683,0.15408326860812696,0.15349551860268643,0.15713095353941609,0.15576310013920353,0.15766789696134095,0.1546251396183985,0.15502268478293627,0.15048079648109497,0.15220608496534274]}
# -- ALL --
errors['ALL from1'] = {4: [0.37113113804324793,0.4074814546070812,0.41608941716165543,0.35708807937876147,0.356659744190598,0.35358833792001854,0.37529886947110086,0.33981956496984517,0.35564437962007983,0.3645860800890135,0.3676947099994381,0.3226872007865616,0.33002822338994303,0.35358833792001854,0.3537139622738606],
                        6: [0.33908208786513533, 0.3470483944464909, 0.4345334451312266, 0.40230177144226714, 0.3181024653678942, 0.3505725497442544, 0.2986010848646997, 0.3248146206850841, 0.3295376246359931, 0.41398101362782136, 0.37215121186099975, 0.29715295749758003, 0.3285466154263908, 0.35752494612601426],
                        8: [0.3025945862179459,0.28541594941140436,0.28090850095029063,0.2849227408541411,0.2982569604380419,0.34745354860291444,0.35971645968450666,0.30608080811261756,0.3169465776727614,0.3306697739742814,0.3242042069029312,0.3084476271463339,0.29879577887731495,0.3157646313633009,0.3078402678732194],
                       10: [0.3251488363254347,0.31161110769921374,0.3334982572933777,0.33219465604438925,0.4298081221257896,0.2881494306927124,0.29398104322191754,0.32338796120005375,0.3241526782873932,0.3175182729268957,0.29038207463947247,0.2817891794651361,0.32640983732879786,0.35010561284989644,0.34048212859221205]}
errors['ALL single1'] = {4: [0.17414037070412317,0.18238855958933037,0.17787562378995794,0.17518414531277535,0.17645738248853332,0.17567103508810236,0.17567140243475748,0.17567103508810245,0.17518414531277535,0.17209583586306634,0.17518414531277532,0.17390772313867295,0.177478023024606,0.17518414531277535,0.17518414531277535],
                          6: [0.1679125500808802, 0.17831552794523675, 0.1759219590404129, 0.1735587622200435, 0.16925915814201387, 0.16950824348109583, 0.16935090614028978, 0.16570828663749798, 0.17181080733841583, 0.17101546890659544, 0.1713098355926668, 0.1719143541333759, 0.17369907787689257, 0.16967757289319138, 0.16786470430493408],
                          8: [0.1673718962891232,0.1689129176356755,0.1675600565477909,0.16597823403953607,0.1711255847430264,0.16645329345059356,0.16718304313532356,0.16286444425407698,0.16313904630777112,0.17101962184346095,0.16386346161092716,0.16934314794468744,0.16857974984893917,0.16311983865936924],
                         10: [0.1629921111843113,0.16534500493006132,0.1665893718237455,0.17188944503522402,0.1587772667681276,0.1806947483065519,0.16879275474136807,0.17269356292171043,0.1685171257192419,0.1635462869047645,0.16304496488661024,0.1652575636529015,0.17014977707213305,0.16405343648843307,0.17125500286332396]}


depths_log = list(range(len(options)))

lin_err = dict()
lin_err['TOP from1'] = (0.2387, 'blue')
lin_err['TOP single1'] = (0.1441, 'orange')
lin_err['2930 from1'] = (0.3210, 'blue')
lin_err['2930 single1'] = (0.1878, 'orange')
lin_err['ALL from1'] = (0.4126, 'blue')
lin_err['ALL single1'] = (0.2437, 'orange')


for interval in ['single1','from1']:
    toprint = [np.array(errors['TOP {0}'.format(interval)][dd]).mean() for dd in options]
    print(str(lin_err['TOP {0}'.format(interval)][0]),toprint[0],toprint[1],toprint[2],toprint[3],sep=' & ',end=' & ')
    


fig = plt.figure(figsize = (18,22.5), constrained_layout=True)
plt.rc('font', size=24)

ax1 = fig.add_subplot(3,2,1)
the_boxes = [errors['ALL single1'][dd] for dd in options]
ax1.axhline(y=lin_err['ALL single1'][0], color='blue', linestyle=':', label='Linear model')
ax1.boxplot(the_boxes)
X_axis, Y_axis = depths_log, np.arange(0,1,0.1)       
ax1.set_xticks([1,2,3,4], list(options))
ax1.set_title("All states, Non-cumulative")
ax1.set_xlabel("Max depth")
ax1.set_ylabel("MARE")
ax1.legend()

ax2 = fig.add_subplot(3,2,2)
the_boxes = [errors['ALL from1'][dd] for dd in options]
ax2.axhline(y=lin_err['ALL from1'][0], color='blue', linestyle=':', label='Linear model')
ax2.boxplot(the_boxes)
X_axis, Y_axis = depths_log, np.arange(0,1,0.1)       
ax2.set_xticks([1,2,3,4], list(options))
ax2.set_title("All states, Cumulative")
ax2.set_xlabel("Max depth")
ax2.set_ylabel("MARE")
ax2.legend()

ax3 = fig.add_subplot(3,2,3)
the_boxes = [errors['2930 single1'][dd] for dd in options]
ax3.axhline(y=lin_err['2930 single1'][0], color='blue', linestyle=':', label='Linear model')
ax3.boxplot(the_boxes)
X_axis, Y_axis = depths_log, np.arange(0,1,0.1)       
ax3.set_xticks([1,2,3,4], list(options))
ax3.set_title("29-30 round states, Non-cumulative")
ax3.set_xlabel("Max depth")
ax3.set_ylabel("MARE")
ax3.legend()

ax4 = fig.add_subplot(3,2,4)
the_boxes = [errors['2930 from1'][dd] for dd in options]
ax4.axhline(y=lin_err['2930 from1'][0], color='blue', linestyle=':', label='Linear model')
ax4.boxplot(the_boxes)
X_axis, Y_axis = depths_log, np.arange(0,1,0.1)       
ax4.set_xticks([1,2,3,4], list(options))
ax4.set_title("29-30 round states, Cumulative")
ax4.set_xlabel("Max depth")
ax4.set_ylabel("MARE")
ax4.legend()

ax5 = fig.add_subplot(3,2,5)
the_boxes = [errors['TOP single1'][dd] for dd in options]
ax5.axhline(y=lin_err['TOP single1'][0], color='blue', linestyle=':', label='Linear model')
ax5.boxplot(the_boxes)
X_axis, Y_axis = depths_log, np.arange(0,1,0.1)       
ax5.set_xticks([1,2,3,4], list(options))
ax5.set_title("Top 10 states, Non-cumulative")
ax5.set_xlabel("Max depth")
ax5.set_ylabel("MARE")
ax5.legend()

ax6 = fig.add_subplot(3,2,6)
the_boxes = [errors['TOP from1'][dd] for dd in options]
ax6.axhline(y=lin_err['TOP from1'][0], color='blue', linestyle=':', label='Linear model')
ax6.boxplot(the_boxes)
X_axis, Y_axis = depths_log, np.arange(0,1,0.1)       
ax6.set_xticks([1,2,3,4], list(options))
ax6.set_title("Top 10 states, Cumulative")
ax6.set_xlabel("Max depth")
ax6.set_ylabel("MARE")
ax6.legend()

plt.show()