In [None]:
num_counties = 401 # number of counties
num_govregions = 34 # number of local governing regions
abs_tol = 100 # maximum absolute error allowed per county migration
rel_tol = 0.01 # maximum relative error allowed per county migration

path = 'http://hpcagainstcorona.sc.bs.dlr.de/data/migration/'

In [None]:
import os
import pandas as pd
import collections
import numpy as np

In [None]:
counties = pd.read_excel(os.path.join(path,'kreise_deu.xlsx'),sheet_name=1)

In [None]:
print(counties)

In [None]:
counties.info()

In [None]:
# get and store all regional (county) identifiers in a list; store county populations accordingly
# get a list of governing regions
countykey_list = []
countypop_list = []
govkey_list = []

for i in range(0, counties.shape[0]): 
    
    # regional county identifieres (5 numbers)
    if(len(str(counties.iloc[i][0]))==5 and (counties.iloc[i][0]).isdigit()):
        countykey_list.append(counties.iloc[i][0])
        countypop_list.append(counties.iloc[i][5])
        #print(counties.iloc[i][0], counties.iloc[i][2]) # print with county name
        
    # government region keys (2 or 3 numbers)
    elif(i<counties.shape[0]-1 and len(str(counties.iloc[i][0]))<len(str(counties.iloc[i+1][0]))): 
        
         # workaround for old gov. regions and Saxony
        if(not str(counties.iloc[i][1]).startswith('früher') and not str(counties.iloc[i][1]).startswith('Direktion')):
            
            # only take those keys which have less numbers than the key in the next row
            if((len(str(counties.iloc[i][0]))!=4 and len(str(counties.iloc[i+1][0]))==5)):
                # where string length is not 4 and next key has length four 
                # these rows correspond to 'local government' regions (except for BW, RP and Saxony)
                govkey_list.append(counties.iloc[i][0])
                #print(counties.iloc[i][0], counties.iloc[i][1])

            elif(i<counties.shape[0]-2):
                
                if(len(str(counties.iloc[i][0]))==3 and len(str(counties.iloc[i+2][0]))==5):  
                    # workaround for BW; 'government regions' are again divided but do not appear as such in 
                    # documents of the Arbeitsagentur
                    #print(counties.iloc[i][0], counties.iloc[i+2][0])
                    govkey_list.append(counties.iloc[i][0])
                    #print(counties.iloc[i][0], counties.iloc[i][1])
                
                if(len(str(counties.iloc[i][0]))==2 and len(str(counties.iloc[i+2][0]))==5): 
                # workaround for RP and Saxony;
                
                    if(str(counties.iloc[i+1][1]).startswith('früher')):
                        # workaround for RP; 'government regions' were dissolved
                        govkey_list.append(counties.iloc[i][0])
                        #print(counties.iloc[i][0], counties.iloc[i][1], )  
                        
                    elif(str(counties.iloc[i+1][1]).startswith('Direktion')):
                        # workaround for Saxony; 'Direktionsbezirke' not referred in commuter migration
                        govkey_list.append(counties.iloc[i][0])
                        #print(counties.iloc[i][0], counties.iloc[i][1], )   
           
if(len(govkey_list) != num_govregions):
    print('Error. Number of government regions wrong. Having', len(govkey_list), 'instead of', num_govregions)

In [None]:
# verify that read list is sorted
sum_check = 0
countykey_list_unique = np.unique(np.array(countykey_list))
for i in range(0, len(countykey_list)):
    sum_check = int(countykey_list_unique[i])-int(countykey_list[i])
    if(sum_check>0):
        print('Error. Input list not sorted, population per county list had to be sorted accordingly.')

# create a hashmap from sorted regional identifiers (01001 - ...) to 0 - num_counties
key2matindex = collections.OrderedDict() 
i=0
for index in countykey_list:
    key2matindex[index] = i
    i += 1
    
if i!=num_counties:
    print("Error. Number of counties wrong.")
    
# create a hash map from sorted gov keys to local list
govkey2local = collections.OrderedDict()
i=0
for index in govkey_list:
    govkey2local[index] = i
    i += 1
    
if i!=num_govregions:
    print("Error. Number of governing regions wrong.")

In [None]:
# make list of government regions with lists of counties that belong to them 
# make list of states with government regions that belong to them
# only works with sorted lists of keys
gov_table = []

gov_index = 0
col_index = 0
col_list = []

for i in range(0, len(countykey_list)):
    
    # check for belonging to currently considered government region
    if str(countykey_list[i]).startswith(str(govkey_list[gov_index])):
        col_list.append(countykey_list[i]) # add county to current government region
        col_index += 1
    # go to next government region
    if(i<len(countykey_list)-1 and (not str(countykey_list[i+1]).startswith(str(govkey_list[gov_index])))):
        gov_table.append(col_list) # add government region to full table
        col_list = []
        gov_index += 1
        col_index = 0   
        
gov_table.append(col_list) # add last government region
        
if(len(gov_table) != num_govregions):
    print('Error. Number of government regions wrong.')
    
# create a unique hash map from county key to its government region and a global key to local (in gov region) key ordering
key2govkey = collections.OrderedDict()
key2localkey = collections.OrderedDict() 
for i in range(0,len(gov_table)):
    for j in range(0,len(gov_table[i])):
        key2govkey[gov_table[i][j]] = i
        key2localkey[gov_table[i][j]] = j

In [None]:
# create government regions list per state
state_gov_table = []

state_id = 1
state_govlist_loc = []
for i in range(0,len(govkey_list)):

    if(str(int(govkey_list[i])).startswith(str(state_id))):
        state_govlist_loc.append(govkey_list[i])
    
    if(i+1<len(govkey_list) and not str(int(govkey_list[i+1])).startswith(str(state_id))):
        state_id += 1
        state_gov_table.append(state_govlist_loc)
        state_govlist_loc = []
        
state_gov_table.append(state_govlist_loc) # add last state's list

In [None]:
# matrix of commuter migration patterns
mat_commuter_migration = np.zeros((num_counties,num_counties))

# maxium errors (of people not detected)
max_abs_err = 0
max_rel_err = 0

files = []
for n in range(1,10):
    files.append('krpend_0'+str(n)+"_0.xlsx")
for n in range(10,17):
    files.append('krpend_'+str(n)+"_0.xlsx")
    
n=0
for item in files:
    # Using the 'Einpendler' sheet to correctly distribute summed values over counties of other gov. region
    commuter_migration_file = pd.read_excel(os.path.join(path,item), sheet_name=3)
    #commuter_migration_file.info()

    counties_done = [] # counties considered as 'migration from'
    current_row = -1 # row of matrix that belongs to county migrated from
    current_col = -1 # column of matrix that belongs to county migrated to
    checksum = 0 # sum of county migration from, to be checked against sum in document

    for i in range(0, commuter_migration_file.shape[0]):

        #print(commuter_migration_file.iloc[i][1])
        #if(str(commuter_migration_file.iloc[i][0]).startswith('03354')):            

        if(len(str(commuter_migration_file.iloc[i][0]))==5 
           and (commuter_migration_file.iloc[i][0]).isdigit()):
            checksum = 0
            # make zero'd list of counties explicitly migrated to from county considered
            # 'implicit' migration means 'migration to' which is summed in a larger regional entity and not given in detail per county
            counties_migratedfrom = []
            for j in range(0,len(gov_table)):
                counties_migratedfrom.append(np.zeros(len(gov_table[j])))        

            counties_done.append(commuter_migration_file.iloc[i][0])
            current_col = key2matindex[commuter_migration_file.iloc[i][0]]
            curr_county_migratedto = commuter_migration_file.iloc[i][1]
            current_key = commuter_migration_file.iloc[i][0]
            current_name = commuter_migration_file.iloc[i][1]
            # migration to itself excluded!
            counties_migratedfrom[key2govkey[current_key]][key2localkey[current_key]] = 1 

        if(type(commuter_migration_file.iloc[i][2]) != float): # removal of nan's, regional keys are stored as strings

            if((commuter_migration_file.iloc[i][2]).isdigit()): # check if entry is a digit       
                #print(commuter_migration_file.iloc[i][0], commuter_migration_file.iloc[i][2], type(commuter_migration_file.iloc[i][2]))
                #print((commuter_migration_file.iloc[i][2]).isdigit(), float(commuter_migration_file.iloc[i-1][2]), str(commuter_migration_file.iloc[i-1][2]).startswith('nan'))
                # explicit migration from county to county
                if(len(str(commuter_migration_file.iloc[i][2]))==5): # check if entry refers to a specific county, then set matrix value
                    current_row = key2matindex[commuter_migration_file.iloc[i][2]]
                    val = commuter_migration_file.iloc[i][4]
                    mat_commuter_migration[current_row, current_col] = val
                    checksum += val
                    #print(val)
                    counties_migratedfrom[key2govkey[commuter_migration_file.iloc[i][2]]][key2localkey[commuter_migration_file.iloc[i][2]]] = 1
                    # print(current_row, current_col, val)

                # take summed values of other REMAINING counties of government region
                # here, some counties of the region are stated explicitly and the rest is summed
                elif(str(commuter_migration_file.iloc[i][3])=='Übrige Kreise (Regierungsbezirk)' and str(commuter_migration_file.iloc[i][4]).isdigit()):

                    # remove trailing zeros (dummy key w/o zeros: dummy_key_wozeros)
                    dummy_key_wozeros = str(commuter_migration_file.iloc[i][2])
                    if(len(dummy_key_wozeros)>2 and dummy_key_wozeros[2]=='0'):
                        dummy_key_wozeros = dummy_key_wozeros[0:2]                     

                    # sum population of all counties not explicitly migrated from of the current gov region migrated from
                    dummy_pop_sum = 0
                    for k in range(0, len(gov_table[govkey2local[dummy_key_wozeros]])):
                        if(counties_migratedfrom[govkey2local[dummy_key_wozeros]][k]<1):
                            # get identifier (0-401) for county key
                            globindex = key2matindex[gov_table[govkey2local[dummy_key_wozeros]][k]]
                            # sum up
                            dummy_pop_sum += countypop_list[globindex]

                    # distribute emigration relatively to county population where migration comes from
                    #dummy_checksum = 0
                    for k in range(0, len(gov_table[govkey2local[dummy_key_wozeros]])):
                        if(counties_migratedfrom[govkey2local[dummy_key_wozeros]][k]<1):
                            # get identifier (0-401) for county key
                            globindex = key2matindex[gov_table[govkey2local[dummy_key_wozeros]][k]]
                            counties_migratedfrom[govkey2local[dummy_key_wozeros]][k] = 1

                            # set value computed relatively to county size and effective migration
                            current_row = globindex
                            val = commuter_migration_file.iloc[i][4]*countypop_list[globindex]/dummy_pop_sum
                            checksum += val
                            #dummy_checksum += val
                            mat_commuter_migration[current_row, current_col] = val
                    #print(dummy_checksum)

                # take summed values of ALL counties of a government region
                # here, no single county of the region is stated explicitly, all counties are summed together
                elif(commuter_migration_file.iloc[i][2] in govkey_list and sum(counties_migratedfrom[govkey2local[commuter_migration_file.iloc[i][2]]])==0):

                    # sum population of all counties not explicitly migrated to of the current gov region migrated to
                    dummy_pop_sum = 0
                    for k in range(0, len(gov_table[govkey2local[commuter_migration_file.iloc[i][2]]])):
                        if(counties_migratedfrom[govkey2local[commuter_migration_file.iloc[i][2]]][k]<1):
                            # get identifier (0-401) for county key
                            globindex = key2matindex[gov_table[govkey2local[commuter_migration_file.iloc[i][2]]][k]]
                            # sum up
                            dummy_pop_sum += countypop_list[globindex]

                    # distribute emigration relatively to county population where migration comes from
                    #dummy_checksum = 0
                    for k in range(0, len(gov_table[govkey2local[commuter_migration_file.iloc[i][2]]])):
                        if(counties_migratedfrom[govkey2local[commuter_migration_file.iloc[i][2]]][k]<1):
                            # get identifier (0-401) for county key
                            globindex = key2matindex[gov_table[govkey2local[commuter_migration_file.iloc[i][2]]][k]]
                            counties_migratedfrom[govkey2local[commuter_migration_file.iloc[i][2]]][k] = 1

                            # set value computed relatively to county size and effective migration
                            current_row = globindex
                            val = commuter_migration_file.iloc[i][4]*countypop_list[globindex]/dummy_pop_sum
                            checksum += val
                            #dummy_checksum += val
                            mat_commuter_migration[current_row, current_col] = val
                    #print(dummy_checksum)   

                # take summed values of other REMAINING counties of a whole Bundesland
                # here, some counties of the Bundesland are stated explicitly and the rest is summed
                # the first or is for the case that the right first line of the incoming people directly
                # addresses one 
                # the latter 'or's is used if no single county nor gov region of a federal state is stated explicitly
                # although there are existent government regions in this federal state (i.e., the state itself is not
                # considered a governement region according to gov_list)
                elif((str(commuter_migration_file.iloc[i][3])=='Übrige Regierungsbezirke (Bundesland)' and str(commuter_migration_file.iloc[i][4]).isdigit())
                     or ((commuter_migration_file.iloc[i][2]).isdigit() and str(commuter_migration_file.iloc[i-1][2]).startswith('nan'))
                     or (len(str(commuter_migration_file.iloc[i][2]))==2 and
                         abs(float(commuter_migration_file.iloc[i][2])-float(commuter_migration_file.iloc[i-1][2]))==1)
                     or (len(str(commuter_migration_file.iloc[i][2]))==2 and
                             abs(float(commuter_migration_file.iloc[i][2])-float(commuter_migration_file.iloc[i-1][2]))==2)):

                    # auxiliary key of Bundesland (key translated to int starting at zero)
                    dummy_key = int(commuter_migration_file.iloc[i][2])-1   

                    # sum population of all counties not explicitly migrated from the current gov region migrated from
                    dummy_pop_sum = 0
                    for j in range(0, len(state_gov_table[dummy_key])): # over all government regions not explicitly stated
                        gov_index = govkey2local[state_gov_table[dummy_key][j]]
                        for k in range(0, len(gov_table[gov_index])): # over all counties of the considered gov region
                            if(counties_migratedfrom[gov_index][k]<1):
                                # get identifier (0-401) for county key
                                globindex = key2matindex[gov_table[gov_index][k]]
                                # sum up
                                dummy_pop_sum += countypop_list[globindex]

                    # distribute emigration relatively to county population where migration comes from   
                    #dummy_checksum = 0
                    for j in range(0, len(state_gov_table[dummy_key])): # over all government regions not explicitly stated
                        gov_index = govkey2local[state_gov_table[dummy_key][j]]
                        for k in range(0, len(gov_table[gov_index])): # over all counties of the considered gov region
                            if(counties_migratedfrom[gov_index][k]<1):
                                # get identifier (0-401) for county key
                                globindex = key2matindex[gov_table[gov_index][k]]
                                counties_migratedfrom[gov_index][k] = 1

                                # set value computed relatively to county size and effective migration
                                current_row = globindex
                                val = commuter_migration_file.iloc[i][4]*countypop_list[globindex]/dummy_pop_sum   
                                checksum += val
                                #dummy_checksum += val
                                mat_commuter_migration[current_row, current_col] = val
                                #print(countypop_list[globindex], dummy_pop_sum, val)

                    #print(dummy_checksum)



        # sum of total migration 'from'
        if(str(commuter_migration_file.iloc[i][3])=='Einpendler aus dem Bundesgebiet'):
            abs_err = abs(checksum - commuter_migration_file.iloc[i][4])
            if(abs_err > max_abs_err):
                max_abs_err = abs_err
            if(abs_err/checksum > max_rel_err):
                max_rel_err = abs_err/checksum                
            if(abs_err < abs_tol and abs_err/checksum < rel_tol):
                #print('Absolute error:', abs_err, '\t relative error:', abs_err/checksum)
                checksum = 0
            else:
                print('Error in calculations for county ',  curr_county_migratedto,
                      '\nAccumulated values:', checksum, 
                      ', correct sum:', commuter_migration_file.iloc[i][4])
                print('Absolute error:', abs_err, ', relative error:', abs_err/checksum)
                #break                

    n += 1
    print('Federal state read. Progress ',n,'/ 16')
if n!=16:
    print('Error. Files missing.')



print('Maximum absolute error:',max_abs_err)
print('Maximum relative error:',max_rel_err)

In [None]:
# just do some tests on randomly chosen migrations

# check migration from Leverkusen (averaged from NRW, 05) to Hildburghausen
city_from = key2matindex['05316']
city_to = key2matindex['16069']
if(countypop_list[city_from]!=163729 or mat_commuter_migration[city_from][city_to] != 34*countypop_list[city_from]/17947221):
    print(countypop_list[city_from], mat_commuter_migration[city_to][city_from])
    print('Error')

# check migration from Duisburg to Oberspreewald-Lausitz
city_from = key2matindex['05112']
city_to = key2matindex['12066']
if(mat_commuter_migration[city_from][city_to] != 10):
    print('Error')
    
# check migration from Lahn-Dill-Kreis to Hamburg
city_from = key2matindex['06532']
city_to = key2matindex['02000']
if(mat_commuter_migration[city_from][city_to] != 92):
    print('Error')    
    
# check migration from Landsberg am Lech (averaged from 091) to Hersfeld-Rotenburg
city_from = key2matindex['09181']
city_to = key2matindex['06632']
if(mat_commuter_migration[city_from][city_to] != 47*120302/(4710865-1484226)):
    print('Error')
      

# check migration from Herzogtum Lauenburg to Flensburg, Stadt
city_from = key2matindex['01001']
city_to = key2matindex['01053']
if(mat_commuter_migration[city_from][city_to] != 17):
    print('Error')    

In [None]:
# compute approximate distance from (longitude,latitude) coordinates in degrees
# cf. https://en.wikipedia.org/wiki/Great-circle_distance
def compute_dist_from_latlong(coords1, coords2):
    # convert t radians
    coords1 = coords1*np.pi/180
    coords2 = coords2*np.pi/180
    
    r=6371 # mean earth radius

    return r*np.arccos(np.sin(coords1[1])*np.sin(coords2[1])+\
              np.cos(coords1[1])*np.cos(coords2[1])*np.cos(np.abs(coords1[0]-coords2[0])))

In [None]:
# Compute distances and weigh commuter on longer distances with factor<1
# currently: linear distance 100-200km: fac=0.5
#                            >200km:    fac=0.2
# coords for center points of German counties ordered by regional key in [latitude,longitude]
coords = np.array([[9.4,82.2],[10.1,81.5],[10.8,80.8],[10.0,81.1],[9.1,81.2],[10.6,80.4],[8.8,82.0],[10.9,81.3],[8.9,80.8],[10.4,81.4],[9.8,81.5],[9.5,81.9],[10.2,80.8],[9.5,80.9],[10.3,80.6],[9.4,80.5],[10.5,78.4],[10.4,78.2],[10.8,78.6],[10.6,78.9],[10.4,77.8],[10.8,78.4],[9.8,77.6],[10.2,78.5],[10.5,78.2],[10.1,77.3],[9.8,78.6],[8.7,79.1],[9.4,78.1],[9.9,78.1],[9.6,77.8],[9.1,79.0],[9.2,78.4],[10.1,79.1],[8.9,80.4],[10.0,80.0],[11.2,79.6],[10.6,79.8],[8.8,79.9],[9.3,79.9],[9.8,79.4],[9.4,80.4],[10.5,79.5],[9.2,79.5],[8.6,79.6],[7.2,80.0],[8.2,79.7],[8.1,78.4],[8.1,80.4],[8.0,79.8],[7.3,80.3],[7.9,79.4],[7.4,79.0],[8.0,80.3],[7.0,78.7],[7.3,80.0],[8.4,79.5],[8.1,78.6],[8.2,79.0],[8.4,80.0],[7.7,80.4],[8.7,80.0],[8.6,80.3],[6.8,76.9],[6.7,77.2],[7.0,77.2],[6.6,77.0],[6.4,76.8],[6.9,77.1],[6.9,77.3],[7.2,76.8],[7.1,76.8],[7.2,76.9],[6.2,77.4],[7.0,76.8],[6.7,76.8],[6.3,76.9],[6.6,77.4],[7.1,76.1],[7.0,76.4],[7.0,76.6],[6.2,76.1],[6.5,76.2],[6.7,76.3],[6.6,75.8],[6.2,76.6],[7.5,76.6],[7.2,76.5],[7.3,76.1],[6.9,77.4],[7.1,77.3],[7.6,77.9],[6.8,78.0],[7.4,77.8],[7.2,77.5],[7.6,78.4],[8.0,77.8],[8.5,78.0],[8.4,77.9],[8.7,78.2],[9.2,77.5],[9.0,78.0],[8.7,78.5],[8.7,77.5],[7.2,77.2],[7.5,77.3],[7.5,77.0],[7.8,77.5],[7.2,77.3],[7.3,77.0],[8.4,77.0],[7.7,76.9],[8.0,76.6],[8.2,76.4],[8.2,77.3],[7.6,77.3],[8.7,74.8],[8.6,75.2],[8.8,75.1],[8.2,75.1],[8.7,74.4],[8.8,74.8],[8.5,74.9],[8.5,75.4],[9.3,75.4],[8.5,75.1],[9.0,74.5],[8.8,75.1],[8.1,75.2],[8.9,75.5],[8.8,75.8],[8.4,76.0],[8.2,75.6],[8.8,76.3],[9.3,75.9],[9.5,77.0],[9.8,75.9],[9.7,76.4],[9.4,77.1],[9.4,76.5],[8.8,76.8],[9.9,76.8],[7.6,75.5],[7.0,75.7],[7.7,76.1],[7.7,74.7],[7.3,74.6],[7.2,75.2],[7.3,75.5],[7.5,75.8],[7.5,75.1],[7.9,75.4],[7.9,75.8],[6.6,74.7],[7.0,74.9],[6.4,75.1],[6.7,75.3],[6.7,74.6],[8.4,74.3],[7.7,74.1],[8.0,73.9],[8.4,74.2],[8.2,74.9],[8.2,74.0],[7.6,73.8],[8.4,74.0],[8.3,74.5],[7.4,73.9],[8.2,74.6],[8.1,74.2],[7.9,74.4],[8.2,73.7],[7.7,74.2],[7.5,74.3],[8.1,73.8],[8.4,74.1],[8.0,74.9],[7.6,73.8],[9.2,73.2],[9.0,73.0],[9.3,73.0],[9.7,73.0],[9.1,73.4],[9.5,73.4],[9.2,73.7],[9.2,73.8],[9.6,73.9],[9.9,73.7],[9.7,74.4],[10.2,73.0],[10.0,73.3],[8.2,73.1],[8.4,73.5],[8.6,73.6],[8.2,73.2],[8.7,74.1],[8.5,74.3],[9.3,74.2],[8.8,74.1],[8.7,73.3],[8.6,73.0],[8.7,73.4],[8.5,72.7],[7.8,72.0],[8.0,71.9],[7.9,72.2],[8.0,72.7],[8.5,72.4],[8.4,72.0],[8.8,72.0],[8.9,71.7],[7.8,71.6],[8.3,71.6],[9.4,72.6],[9.0,72.7],[8.9,72.4],[9.9,72.6],[9.9,72.6],[9.7,72.2],[9.4,71.6],[9.8,71.7],[9.3,72.1],[11.4,73.1],[11.5,72.2],[12.1,71.8],[12.7,72.3],[12.9,71.6],[11.5,71.6],[11.3,72.5],[11.9,72.1],[11.3,73.4],[12.0,72.4],[11.8,72.7],[11.2,72.3],[11.1,71.3],[11.0,72.1],[11.8,71.6],[12.3,72.4],[11.6,72.2],[11.2,73.0],[11.5,72.9],[12.1,71.8],[11.3,72.0],[12.6,71.8],[11.1,71.7],[12.2,72.8],[13.4,72.9],[12.6,73.3],[13.0,73.2],[13.5,73.2],[11.9,73.2],[12.2,72.8],[13.4,72.8],[13.1,73.5],[12.8,72.6],[12.5,73.4],[12.6,73.0],[11.8,74.2],[12.1,73.5],[12.2,74.5],[11.8,74.2],[12.7,73.9],[11.6,73.8],[12.1,74.5],[12.1,73.5],[12.3,74.1],[12.2,74.9],[10.9,74.8],[11.6,74.9],[11.0,75.4],[11.9,75.5],[10.8,74.9],[11.5,74.8],[11.0,75.4],[11.2,74.6],[11.8,75.4],[11.4,75.5],[11.5,75.2],[11.1,75.2],[12.0,75.1],[10.6,73.9],[11.0,74.4],[11.0,74.2],[11.1,74.2],[11.0,74.0],[10.5,73.9],[10.9,74.4],[10.9,74.2],[11.4,74.3],[10.4,74.4],[11.1,73.8],[10.9,73.5],[9.2,75.0],[10.2,75.1],[9.9,74.7],[9.2,75.0],[10.0,75.3],[10.3,75.6],[10.6,75.1],[10.3,74.6],[9.3,74.6],[9.7,75.0],[10.2,75.0],[9.9,74.6],[10.9,72.5],[10.6,71.8],[10.3,71.6],[10.2,72.0],[11.1,72.6],[10.7,72.5],[10.5,72.9],[10.4,72.5],[10.1,72.4],[9.8,71.4],[10.6,71.7],[10.4,72.0],[10.7,73.2],[10.2,71.4],[6.9,73.9],[6.7,74.2],[7.1,74.0],[6.8,74.0],[7.2,73.9],[7.1,74.3],[13.4,78.8],[12.5,78.6],[14.4,77.7],[14.5,78.5],[13.0,78.6],[13.8,79.2],[13.9,78.1],[13.5,77.4],[12.7,78.9],[14.1,79.0],[13.2,79.4],[13.9,77.4],[14.2,78.4],[12.6,79.5],[12.8,78.4],[11.8,79.7],[14.4,77.7],[13.2,78.2],[13.8,79.9],[12.1,81.2],[11.4,80.4],[13.0,80.4],[12.2,80.9],[13.0,81.5],[11.3,80.7],[13.7,80.6],[11.5,80.2],[12.9,76.2],[13.0,75.9],[13.1,76.4],[12.2,75.6],[12.5,76.1],[13.8,76.6],[14.2,76.9],[14.7,76.8],[13.5,76.8],[13.9,76.4],[12.4,77.0],[12.5,76.8],[12.7,77.2],[12.2,77.8],[12.0,77.2],[11.6,78.2],[11.2,79.0],[12.2,77.7],[11.4,78.3],[11.8,76.7],[11.0,77.7],[12.0,78.4],[11.3,77.3],[11.8,77.2],[11.6,77.8],[11.8,79.0],[12.7,77.7],[11.0,76.5],[12.1,76.3],[11.6,76.4],[10.7,75.9],[11.3,76.5],[10.3,76.5],[10.2,77.1],[10.7,77.3],[10.2,76.3],[10.6,76.8],[11.0,77.0],[10.4,75.9],[10.7,76.4],[11.2,76.7],[10.7,75.6],[11.0,76.1],[11.4,76.4],[11.1,75.6],[11.3,75.9],[11.7,76.4],[11.7,75.9],[12.1,76.1],[12.4,76.4]])
coords[:,1] = coords[:,1]/1.5 # yvalues were scaled by 1.5

dist1 = 100
dist2 = 200
fac1 = 0.5
fac2 = 0.2

mat_commuter_migration_scaled = np.array(mat_commuter_migration, copy=True)
distances = np.zeros((len(mat_commuter_migration),len(mat_commuter_migration)))
for i in range(len(mat_commuter_migration)):
    for j in range(i+1,len(mat_commuter_migration)):
        distances[i,j] = compute_dist_from_latlong(coords[i],coords[j])
        if distances[i,j] > 100:
            if distances[i,j] > 200:
                mat_commuter_migration_scaled[i,j] *= 0.2
                mat_commuter_migration_scaled[j,i] *= 0.2
            else:
                mat_commuter_migration_scaled[i,j] *= 0.5
                mat_commuter_migration_scaled[j,i] *= 0.5
                


In [None]:
cgn = key2matindex['05315'] # Koeln
fra = key2matindex['06412'] # Frankfurt
ber = key2matindex['11000'] # Berlin
print(distances[cgn,fra]) # ca 150 km
print(distances[cgn,ber]) # ca 478 km
print(distances[fra,ber]) # ca 423 km

In [None]:
np.savetxt('mat_commuter.txt', mat_commuter_migration, delimiter=',')
np.savetxt('mat_commuter_scaled.txt', mat_commuter_migration_scaled, delimiter=',')

In [None]:
sum(sum(mat_commuter_migration_scaled))/sum(sum(mat_commuter_migration))

In [None]:
from matplotlib.pyplot import spy
spy(mat_commuter_migration-mat_commuter_migration_scaled)