### Merging area_by_powiat.xlsx & area_by_voivodship.xlsx
### Calculating powiat to voivodship ratio using area

In [1]:
import pandas as pd
import numpy as np

Load & process area_by_powiat.xlsx

In [21]:
df1 = pd.read_excel('area_by_powiat.xlsx')
df1 = df1.drop([0, 1, 2]).drop(columns=['Unnamed: 0'])
df1.columns = ['code', 'county', '2017', '2018', '2019', '2020', '2021']

df1["county_area_mean"] = df1[['2017', '2018', '2019', '2020', '2021']].mean(axis=1)
df1["voivod_code"] = (df1['code'].astype(int) / (10**5))
df1["voivod_code"] = df1["voivod_code"].astype(int) * 100000

# df1["area_variance"] = df1[['2017', '2018', '2019', '2020', '2021']].var(axis=1)
# df1 = df1.sort_values(by='area_variance',ascending=False)

df1.head()

Unnamed: 0,code,county,2017,2018,2019,2020,2021,county_area_mean,voivod_code
3,201000,powiat bolesławiecki,1304,1304,1304,1304,1304,1304.0,200000
4,202000,powiat dzierżoniowski,479,479,479,479,479,479.0,200000
5,203000,powiat głogowski,443,443,443,443,443,443.0,200000
6,204000,powiat górowski,738,738,738,738,738,738.0,200000
7,205000,powiat jaworski,582,582,582,582,582,582.0,200000


Load & process area_by_voivodship.xlsx

In [22]:
df2 = pd.read_excel('area_by_voivodship.xlsx')
df2 = df2.drop([0, 1, 2]).drop(columns=['Unnamed: 0'])
df2.columns = ['code', 'voivodship', '2017', '2018', '2019', '2020', '2021']

df2["voivod_area_mean"] = df2[['2017', '2018', '2019', '2020', '2021']].mean(axis=1)

# df2["area_variance"] = df2[['2017', '2018', '2019', '2020', '2021']].var(axis=1)
# df2 = df2.sort_values(by='area_variance',ascending=False)

df2.head()

Unnamed: 0,code,voivodship,2017,2018,2019,2020,2021,voivod_area_mean
3,200000,dolnośląskie,19947,19947,19947,19947,19947,19947.0
4,400000,kujawsko-pomorskie,17972,17972,17971,17971,17971,17971.4
5,600000,lubelskie,25122,25122,25123,25123,25123,25122.6
6,800000,lubuskie,13988,13988,13988,13988,13988,13988.0
7,1000000,łódzkie,18219,18219,18219,18219,18219,18219.0


Merge powiats & voivodships

In [25]:
df = pd.merge(df1[['code', 'voivod_code','county','county_area_mean']], df2[['code', 'voivodship', 'voivod_area_mean']], how='inner', left_on = 'voivod_code', right_on = 'code')
df = df.drop(columns=['voivod_code']).rename(columns={'code_x': 'county_code', 'code_y': 'voivod_code'})
df.head()

Unnamed: 0,county_code,county,county_area_mean,voivod_code,voivodship,voivod_area_mean
0,201000,powiat bolesławiecki,1304.0,200000,dolnośląskie,19947.0
1,202000,powiat dzierżoniowski,479.0,200000,dolnośląskie,19947.0
2,203000,powiat głogowski,443.0,200000,dolnośląskie,19947.0
3,204000,powiat górowski,738.0,200000,dolnośląskie,19947.0
4,205000,powiat jaworski,582.0,200000,dolnośląskie,19947.0


Compare the sum of powiats' areas vs the area of voivodships

In [47]:
pd.merge(df.groupby(["voivodship"])["county_area_mean"].sum().reset_index(name="sum_of_pow_areas"), \
        df[['voivodship', 'voivod_area_mean']].drop_duplicates(subset=["voivodship"]) )

Unnamed: 0,voivodship,sum_of_pow_areas,voivod_area_mean
0,dolnośląskie,19947.0,19947.0
1,kujawsko-pomorskie,17971.4,17971.4
2,lubelskie,25122.6,25122.6
3,lubuskie,13988.0,13988.0
4,mazowieckie,35558.6,35558.6
5,małopolskie,15183.0,15183.0
6,opolskie,9412.0,9412.0
7,podkarpackie,17846.0,17846.0
8,podlaskie,20187.0,20187.0
9,pomorskie,18320.0,18320.0


In [45]:
df['powiat_weight'] = (df['county_area_mean'] / df['voivod_area_mean']).round(6)
df.head()

Unnamed: 0,county_code,county,county_area_mean,voivod_code,voivodship,voivod_area_mean,powiat_weight
0,201000,powiat bolesławiecki,1304.0,200000,dolnośląskie,19947.0,0.065373
1,202000,powiat dzierżoniowski,479.0,200000,dolnośląskie,19947.0,0.024014
2,203000,powiat głogowski,443.0,200000,dolnośląskie,19947.0,0.022209
3,204000,powiat górowski,738.0,200000,dolnośląskie,19947.0,0.036998
4,205000,powiat jaworski,582.0,200000,dolnośląskie,19947.0,0.029177


In [46]:
df.to_csv("powiat_to_voivodship_ratio_by_area.csv")