Religion Notebook

In [None]:
from bs4 import BeautifulSoup
import requests
import pandas as pd

url = "https://en.wikipedia.org/wiki/Religion_in_Germany#cite_note-2011zensus-4"
response = requests.get(url)

if response.status_code == 200:
    health_soup = BeautifulSoup(response.content, 'html.parser')
else:
    print('Couldn\'t reach', url)

In [None]:
with open("religion.html", 'w') as file:
    file.write(health_soup.prettify())

In [None]:
religion_table = health_soup.find('table', class_='sortable wikitable')

rows = [row for row in religion_table.find_all('tr') if not row.find('flagicon')]


state_list = []
protestant_list = []
catholic_list = []
non_rel_list = []
muslim_list = []
other_list = []

print(rows)
#print(religion_table[:3])
for row in rows:
    # Find all table data elements in the row
    cells = row.find_all('td')

    # Extract data if there are enough cells in the row
    if len(cells) == 6:
        # Extracting data from cells
        state = cells[0].find('a').get_text(strip=True)  # Extract state name from the anchor tag inside the first cell
        protestant = cells[1].get_text(strip=True)
        catholic = cells[2].get_text(strip=True)
        non_religious = cells[3].get_text(strip=True)
        muslim = cells[4].get_text(strip=True)
        other = cells[5].get_text(strip=True)

        # Append data to respective lists
        state_list.append(state)
        protestant_list.append(protestant)
        catholic_list.append(catholic)
        non_rel_list.append(non_religious)
        muslim_list.append(muslim)
        other_list.append(other)

In [None]:
religion_table = health_soup.find('table', class_='sortable wikitable')


state_list = []
prodestant_list = []
catholic_list = []
non_rel_list = []
muslim_list = []
other_list = []

rows = religion_table.find_all('tr')
print(rows)
#print(religion_table[:3])
for row in rows:
    # Find all table data elements in the row
    cells = row.find_all('td') 

    print(cells)
    # Extract hospital name, city, number of beds
    if len(cells) == 6:  # Ensure the row has enough data cells
        state = cells[0].get_text(strip=True)
        prodestant = cells[1].get_text(strip=True)
        catholic = cells[2].get_text(strip=True)
        non_rel = cells[3].get_text(strip=True)
        muslim = cells[4].get_text(strip=True)
        other = cells[5].get_text(strip=True)
        
        # Remove unwanted characters or tags from beds
        # beds = beds.split('[')[0]  # Remove citation references
        
        # store the extracted data
        #print(f"Hospital: {hospital}, City: {city}, Beds: {beds}")
        state_list.append(state)
        prodestant_list.append(prodestant)
        catholic_list.append(catholic)
        non_rel_list.append(non_rel)
        muslim_list.append(muslim)
        other_list.append(other)
        

In [None]:
print(len(state_list))
print(len(prodestant_list))
print(len(catholic_list))
print(len(non_rel_list))
print(len(muslim_list))
print(len(other_list))

In [None]:
state_list

In [None]:
rel_df = pd.DataFrame({'State': state_list,
               'Protestant': prodestant_list,
               'Catholic': catholic_list,
               'Nonrel': non_rel_list,
                'Muslim': muslim_list,
                'Other': other_list})

In [None]:
rel_df['Protestant'] = rel_df['Protestant'].str.replace('%', '').astype(float)
rel_df['Catholic'] = rel_df['Catholic'].str.replace('%', '').astype(float)
rel_df['Nonrel'] = rel_df['Nonrel'].str.replace('%', '').astype(float)
rel_df['Muslim'] = rel_df['Muslim'].str.replace('%', '').astype(float)
rel_df['Other'] = rel_df['Other'].str.replace('%', '').astype(float)


In [None]:
rel_df.drop([4, 5, 17], inplace = True, axis = 0)

In [None]:
# Alternative state names dictionary
alternative_names = {
    'Lower Saxony': 'Niedersachsen',
    'Bavaria': 'Bayern',
    'Hesse': 'Hessen',
    'North Rhine-Westphalia': 'Nordrhein-Westfalen',
    'Rhineland-Palatinate': 'Rheinland-Pfalz',
    'Thuringia': 'Thüringen'
}

# List of states in order from rel_df
states_rel_df = [
    'Baden-Württemberg',
    'Bavaria',
    'Brandenburg',
    'Bremen',
    'Hamburg',
    'Hesse',
    'Lower Saxony',
    'Mecklenburg-Vorpommern',
    'North Rhine-Westphalia',
    'Rhineland-Palatinate',
    'Saarland',
    'Saxony',
    'Saxony-Anhalt',
    'Schleswig-Holstein',
    'Thuringia'
]

# Add alternative names column to rel_df
rel_df['Alternative Name'] = rel_df['State'].map(alternative_names)

# Use the 'Alternative Name' column for featureidkey if it exists, otherwise use 'State'
featureidkey = 'Alternative Name' if 'Alternative Name' in rel_df.columns else 'State'

# Fill missing values in "Alternative Name" with the corresponding values from "State Name"
rel_df['Alternative Name'] = rel_df['Alternative Name'].fillna(rel_df['State'])


In [None]:
rel_df.to_csv('religion.csv', index=False)

In [None]:
import plotly.express as px
import json
import numpy as np

In [None]:
import dash
from dash import dcc, html, Input, Output
import pandas as pd
import plotly.express as px
import json

# Read religion data
religion_data = pd.read_csv("religion.csv")

# Read GeoJSON data
with open("germany-states.geojson", "r", encoding="utf-8") as f:
    geojson_data = json.load(f)

# Function to find matching state name
def find_state_name(state_name):
    for feature in geojson_data["features"]:
        if feature["properties"]["NAME_1"] == state_name:
            return feature["properties"]["NAME_1"]
        elif "VARNAME_1" in feature["properties"] and feature["properties"]["VARNAME_1"] == state_name:
            return feature["properties"]["NAME_1"]
    return None

# Merge data
religion_data["State_Name"] = religion_data["State"].apply(find_state_name)
merged_data = pd.merge(religion_data, pd.DataFrame(geojson_data["features"])["properties"].apply(pd.Series), 
                       left_on="State_Name", right_on="NAME_1", how="left")

# Initialize the Dash app
app = dash.Dash(__name__)

# Define app layout
app.layout = html.Div([
    html.H1("Religion Distribution in Germany"),
    dcc.RadioItems(
        id='religion-selector',
        options=[
            {'label': 'Protestant', 'value': 'Protestant'},
            {'label': 'Catholic', 'value': 'Catholic'},
            {'label': 'Non-religious', 'value': 'Nonrel'},
            {'label': 'Muslim', 'value': 'Muslim'},
            {'label': 'Other Religion', 'value': 'Other'}
        ],
        value='Protestant',
        labelStyle={'display': 'block'}
    ),
    dcc.Graph(id='choropleth-map')
])

# Define callback to update choropleth map based on selected religion
@app.callback(
    Output('choropleth-map', 'figure'),
    [Input('religion-selector', 'value')]
)
def update_map(selected_religion):
    fig = px.choropleth(merged_data, geojson=geojson_data, locations='NAME_1', color=selected_religion,
                        color_continuous_scale="Viridis",
                        range_color=(0, merged_data[selected_religion].max()),
                        featureidkey="properties.NAME_1",
                        labels={selected_religion: f"{selected_religion} Population in Germany"},
                        title=f"{selected_religion} Population in Germany")
    fig.update_geos(showcountries=False, showcoastlines=False, showland=True, fitbounds="locations")
    return fig

# Run the app
if __name__ == '__main__':
    app.run_server(debug=True)
