In [131]:
import pandas as pd

# Data fetching and cleaning

In [132]:
url = 'https://de.wikipedia.org/wiki/Gemeinden_des_Kantons_Waadt'

df_communities_1 = pd.read_html(url)[0]
df_communities_1 = df_communities_1.rename(columns={
    'Name der Gemeinde': 'community_name',
    'Fläche in km² [1]': 'area_km2',
}).drop(columns=['Wappen', 'Einwohner (31.\xa0Dezember 2022)', 'Einw. pro km²', 'Bezirk (bis 2007)', 'Bezirk (ab 2008)'])
df_communities_1 = df_communities_1[df_communities_1['community_name'] != 'Total (300)']


df_communities_2 = pd.read_csv('raw/SteuerbaresEinkommen_CH.csv', sep=';')
df_communities_2 = df_communities_2[df_communities_2['VARIABLE'] == 'Steuerbares Einkommen pro Steuerpflichtigem/-r, in Franken']
df_communities_2 = df_communities_2.rename(columns={
    'GEO_NAME': 'community_name',
    'VALUE'   : 'taxable_income_million_CHF'
})[['community_name', 'taxable_income_million_CHF']]
df_communities_2.fillna(0, inplace=True)

df_communities = df_communities_1.merge(df_communities_2, on='community_name')
display(df_communities)


Unnamed: 0,community_name,area_km2,taxable_income_million_CHF
0,Aclens,390,79585.0
1,Agiez,546,70694.0
2,Aigle,1641,60548.0
3,Allaman,260,98512.0
4,Arnex-sur-Nyon,204,100518.0
...,...,...,...
288,Vully-les-Lacs,2092,74996.0
289,Yens,951,116253.0
290,Yverdon-les-Bains,1354,61649.0
291,Yvonand,1340,67055.0


In [133]:
df_communities['criteria1'] = df_communities['area_km2'] ** 2

In [134]:
def taxable_income_rating(x):
    if x > 60000:
        return x * 2;
    else:
        return x;

df_communities['criteria2'] = df_communities['taxable_income_million_CHF'].apply(taxable_income_rating)

In [135]:
def get_number_of_words(x):
    length = len(x.split(' '))
    if length % 2 == 0:
        return length * 2
    return length

df_communities['criteria3'] = df_communities['community_name'].apply(get_number_of_words)

# Calculate the final score

In [136]:
def normalize_column(df, column_name):
    min_val = df[column_name].min()
    max_val = df[column_name].max()
    return ((df[column_name] - min_val) / (max_val - min_val)) * 100

df_communities['criteria1'] = normalize_column(df_communities, 'criteria1')
df_communities['criteria2'] = normalize_column(df_communities, 'criteria2')
df_communities['criteria3'] = normalize_column(df_communities, 'criteria3')

In [137]:
df_communities['score'] = df_communities['criteria1'] * 0.4 + df_communities['criteria2'] * 0.2 + df_communities['criteria3'] * 0.4

display(df_communities.sort_values('score', ascending=False))

Unnamed: 0,community_name,area_km2,taxable_income_million_CHF,criteria1,criteria2,criteria3,score
146,Le Chenit,9919,66116.0,100.000000,11.403459,100.0,82.280692
6,Arzier-Le Muids,5190,118209.0,27.377115,20.388279,100.0,55.028502
147,Le Lieu,3254,67512.0,10.761285,11.644236,100.0,46.633361
278,Villeneuve (VD),3198,64572.0,10.394044,11.137155,100.0,46.385049
140,La Rippe,1659,113422.0,2.796466,19.562634,100.0,45.031113
...,...,...,...,...,...,...,...
185,Mutrux,323,0.0,0.105064,0.000000,0.0,0.042026
56,Chêne-Pâquier,211,0.0,0.044275,0.000000,0.0,0.017710
186,Novalles,206,0.0,0.042156,0.000000,0.0,0.016862
229,Rossenges,108,0.0,0.010879,0.000000,0.0,0.004351


In [138]:
df_export = df_communities.sort_values('score', ascending=False)
df_export.to_csv('rankings/3_bfs.csv', index=False)