In [1]:
from datenguide_python.query_builder import QueryBuilder
from datenguide_python.query_execution import QueryExecutioner

In [2]:
from unittest.mock import Mock

In [85]:
def nuts_query(nuts_level):
    q = Mock()
    q.get_fields.return_value = ['allRegions','id','name']
    q.get_graphql_query.return_value =  """
    query AR($page: Int!,$itemsPerPage: Int!){
        allRegions(page: $page, itemsPerPage:$itemsPerPage) {
        regions (nuts : """ +str(nuts_level) + """) {
        id
        name
        }
        page
        itemsPerPage
        total
        }
    }
    """
    return q

def lau_query(lau_level):
    q = Mock()
    q.get_fields.return_value = ['allRegions','id','name']
    q.get_graphql_query.return_value =  """
    query AR($page: Int!,$itemsPerPage: Int!){
        allRegions(page: $page, itemsPerPage:$itemsPerPage) {
        regions (lau : """ +str(lau_level) + """) {
        id
        name
        }
        page
        itemsPerPage
        total
        }
    }
    """
    return q

In [86]:
qb_all = Mock()
qb_all.get_graphql_query.return_value = """
query AR($page: Int!,$itemsPerPage: Int!){
    allRegions(page: $page, itemsPerPage:$itemsPerPage) {
    regions {
    id
    name
    }
    page
    itemsPerPage
    total
    }
}
"""
qb_all.get_fields.return_value = ['allRegions','id','name']



In [87]:
qe = QueryExecutioner()
all_regions = qe.run_query(qb_all)
r_nuts1 = qe.run_query(nuts_query(1))
r_nuts2 = qe.run_query(nuts_query(2))
r_nuts3 = qe.run_query(nuts_query(3))
r_lau1 = qe.run_query(lau_query(1))
r_lau2 = qe.run_query(lau_query(2))

In [92]:
levels = {'nuts1':r_nuts1,
'nuts2':r_nuts2,
'nuts3':r_nuts3,
'lau1':r_lau1,
'lau2':r_lau2}

In [9]:
import pandas as pd

In [93]:
all_regions_df = pd.concat([pd.DataFrame(page['data']['allRegions']['regions']) for page in all_regions.query_results])

In [122]:
level_df = pd.concat(pd.concat([pd.DataFrame(page['data']['allRegions']['regions']) for page in levels[k].query_results]).assign(level = k) for k in levels)

In [141]:
level_df.level.value_counts(ascending=True)

nuts1       16
nuts2       35
nuts3      483
lau2     13963
lau1     13963
Name: level, dtype: int64

In [138]:
level_df.id.str.len().value_counts(ascending=True)

2        16
3        35
5       483
11     1734
10     5476
8     20716
Name: id, dtype: int64

In [137]:
level_df.groupby('level').apply(lambda df: (df.id.str.len().min(),df.id.str.len().max()))

level
lau1     (8, 11)
lau2     (8, 11)
nuts1     (2, 2)
nuts2     (3, 3)
nuts3     (5, 5)
dtype: object

In [144]:
no_level = all_regions_df.merge(level_df,on='id',how='left').query('level.isna()')
print(no_level.shape)
no_level.head()

(180, 4)


Unnamed: 0,id,name_x,name_y,level
9561,DG,Deutschland,,
16316,0713101,Adenau,,
16391,0713102,Altenahr,,
16416,0713103,Bad Breisig,,
16425,0713104,Brohltal,,


In [15]:
ar_detail = all_regions.assign(level = lambda df:df.id.map(len))

In [67]:
def isAnscestor(region_id,candidate):
    return region_id.startswith(candidate) and candidate != region_id

def isDescendent(region_id,candidate):
    return candidate.startswith(region_id) and candidate != region_id

def children(region_id):
    desc =  ar_detail.assign(desc = lambda df:df.id.map(lambda i: isDescendent(region_id,i))).query('desc')
    min_lev = desc.level.min()
    return desc.query('level == @min_lev')

def parent(region_id):
    desc =  ar_detail.assign(ansc = lambda df:df.id.map(lambda i: isAnscestor(region_id,i))).query('ansc')
    max_lev = desc.level.max()
    parent_frame = desc.query('level == @max_lev')
    if not parent_frame.empty:
        return parent_frame.iloc[0,:].loc['id']
    else:
        None


In [44]:
ar_detail[ar_detail.name.str.contains('Frankfurt')]

Unnamed: 0,id,name,level
36,12053,Frankfurt (Oder),5
234,12053000,Frankfurt (Oder),8
870,6412,Frankfurt am Main,5
649,6412000,Frankfurt am Main,8


In [99]:
ar_detail.query('id == "05911"')

Unnamed: 0,id,name,level
777,5911,Bochum,5


In [70]:
parent_rel = ar_detail.assign(parent = lambda df: df.id.map(parent))

In [73]:
def children(region_id):
    return parent_rel.query('parent == @region_id')

In [79]:
children('16')

Unnamed: 0,id,name,level,parent
156,16051,Erfurt,5,16
157,16052,Gera,5,16
158,16053,Jena,5,16
159,16054,Suhl,5,16
160,16055,Weimar,5,16
161,16056,Eisenach,5,16
162,16061,Eichsfeld,5,16
163,16062,Nordhausen,5,16
164,16063,Wartburgkreis,5,16
165,16064,Unstrut-Hainich-Kreis,5,16
