In [1]:
import pandas as pd
import plotly.express as px
import json
import plotly.graph_objects as go
import geopandas as gpd


data = pd.read_stata('C:/Users/apillai/OneDrive - World Justice Project/Documents/GitHub/EU-NUTS-GIS/Data/TrialData.dta')

recode_values = {
    'A lot': 1, 'Some': 1,
    'A little': 0, 'No trust': 0, 'Don\'t know': 0, 'No answer': 0
}

for column in ['TRT_police', 'TRT_prosecutors', 'TRT_pda', 'TRT_judges']:
    data[column] = data[column].map(recode_values)

long_format = data.melt(id_vars=['nuts_id','country_name_ltn', 'nuts_ltn'], value_vars=['TRT_police', 'TRT_prosecutors', 'TRT_pda', 'TRT_judges'],
                        var_name='Variable', value_name='Value')

long_format_clean = long_format[long_format['nuts_id'].notnull() & (long_format['nuts_id'].str.strip() != '')]

percentage_trust = long_format_clean.groupby(['nuts_id', 'Variable','country_name_ltn', 'nuts_ltn'])['Value'].mean() * 100
percentage_trust_df = percentage_trust.reset_index()
percentage_trust_df.rename(columns={'Value': 'Percentage'}, inplace=True)

def categorize_percentage(value):
    if value <= 10:
        return "0%-10%"
    elif 10 < value <= 25:
        return "10%-25%"
    elif 25 < value <= 50:
        return "25%-50%"
    elif 50 < value <= 75:
        return "50%-75%"
    elif 75 < value <= 90:
        return "75%-90%"
    else:
        return "90%-100%"


percentage_trust_df['cat4map'] = percentage_trust_df['Percentage'].apply(categorize_percentage)


gdf = gpd.read_file("C:/Users/apillai/OneDrive - World Justice Project/Documents/GitHub/EU-NUTS-GIS/Data/EU_base_map.geojson")


data4map = percentage_trust_df.merge(gdf, how='left', left_on='nuts_id', right_on='polID') 

category_mapping = {
    "0%-10%": 1,
    "10%-25%": 2,
    "25%-50%": 3,
    "50%-75%": 4,
    "75%-90%": 5,
    "90%-100%": 6
}
data4map['cat4map_numeric'] = data4map['cat4map'].map(category_mapping)

In [None]:


#with open('C:/Users/apillai/OneDrive - World Justice Project/Documents/GitHub/EU-NUTS-GIS/Data/EU_base_map.geojson', 'r', encoding='utf-8') as file:
#    eu_geojson = json.load(file)

fig = px.choropleth_mapbox(
    data4map,
    geojson=json.loads(gdf.to_json()),
    locations='polID',  
    color='cat4map',  
    color_continuous_scale=px.colors.sequential.Viridis,  
    mapbox_style="carto-positron",
    zoom=3,
    center={"lat": 54.5260, "lon": 15.2551},
    opacity=0.5
)

# Add dropdown for interactivity
variable_options = percentage_trust_df['Variable'].unique()
buttons = []

for variable in variable_options:
    # Filter the dataframe for the selected variable
    filtered_df = percentage_trust_df[percentage_trust_df['Variable'] == variable]
    
    # The 'args' for the dropdown must be a list of dictionaries. Each dictionary represents the changes to be made when the dropdown option is selected.
    # 'z' should be the new color values, and 'locations' should be the new location identifiers
    buttons.append(
        dict(
            args=[{
                'z': [filtered_df['Percentage'].tolist()],  # Color values for the choropleth
                'locations': [filtered_df['nuts_id'].tolist()]  # Location identifiers
            }],
            label=variable,
            method="update"  
        )
    )

fig.update_layout(
    updatemenus=[
        go.layout.Updatemenu(
            buttons=buttons,
            direction="down",
            showactive=True
        ),
    ]
)

# Set the title for the map
fig.update_layout(title_text='Trust in Public Institutions across EU')

# Export to an HTML file
fig.write_html('C:/Users/apillai/OneDrive - World Justice Project/Documents/GitHub/EU-NUTS-GIS/Data/interactive_map.html')


In [None]:
data4map

In [None]:
percentage_trust_df

In [None]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import geopandas as gpd
import json



# Initial merge for the map
gdf = gpd.read_file("C:/Users/apillai/OneDrive - World Justice Project/Documents/GitHub/EU-NUTS-GIS/Data/EU_base_map.geojson")

# Ensure this merge is correct; the right_on value must match the key in the gdf that corresponds to nuts_id
data4map = percentage_trust_df.merge(gdf, how='left', left_on='nuts_id', right_on='polID')

# Read the GeoJSON file
#with open('C:/Users/apillai/OneDrive - World Justice Project/Documents/GitHub/EU-NUTS-GIS/Data/EU_base_map.geojson', 'r', encoding='utf-8') as file:
#    eu_geojson = json.load(file)

# Create the initial map using plotly express, this time we'll use the merged data4map
fig = px.choropleth_mapbox(
    data4map,
    geojson= json.loads(gdf.to_json()),
    locations='nuts_id',  # This should match the feature id in your GeoJSON properties
    color='cat4map',
    color_discrete_map={  # Use a discrete color map
        "0%-10%": "red",
        "10%-25%": "orange",
        "25%-50%": "yellow",
        "50%-75%": "lightgreen",
        "75%-90%": "green",
        "90%-100%": "darkgreen"
    },
    mapbox_style="carto-positron",
    zoom=3,
    center={"lat": 54.5260, "lon": 15.2551},
    opacity=0.5
)

# Add dropdown for interactivity
variable_options = data4map['Variable'].unique()
buttons = []

# Mapping categorical data to numerical values
category_to_num = {
    "0%-10%": 1,
    "10%-25%": 2,
    "25%-50%": 3,
    "50%-75%": 4,
    "75%-90%": 5,
    "90%-100%": 6
}

# Apply this mapping to the 'cat4map' column
data4map['cat4map_num'] = data4map['cat4map'].map(category_to_num)

# Define the discrete color scale
color_scale = [
    [0, "red"],         # 0%-10%
    [1/6, "orange"],    # 10%-25%
    [2/6, "yellow"],    # 25%-50%
    [3/6, "lightgreen"],# 50%-75%
    [4/6, "green"],     # 75%-90%
    [5/6, "darkgreen"], # 90%-100%
]

# Now you can use 'cat4map_num' as the z value and color_scale as the colorscale
for variable in variable_options:
    # Filter the dataframe for the selected variable
    filtered_data = data4map[data4map['Variable'] == variable].copy()

    # Use 'cat4map_num' for the z value
    fig.add_trace(
        go.Choroplethmapbox(
            geojson=json.loads(gdf.to_json()),
            locations=filtered_data['polID'],
            z=filtered_data['cat4map_num'],
            colorscale=color_scale,
            showscale=False,
            visible=False  # Set all to false initially
        )
    )
    # Add the button for the variable
    buttons.append(
        dict(
            args=['visible', [v == variable for v in variable_options]],
            label=variable,
            method="restyle"
        )
    )

# Make the first variable's trace visible
fig.data[0].visible = True

# Update the layout with the dropdown
fig.update_layout(
    updatemenus=[{
        "buttons": buttons,
        "direction": "down",
        "showactive": True
    }]
)

# Set the title for the map
fig.update_layout(title_text='Trust in Public Institutions across EU')

# Export to an HTML file
fig.write_html('C:/Users/apillai/OneDrive - World Justice Project/Documents/GitHub/EU-NUTS-GIS/Data/interactive_map.html')

# Show the figure
fig.show()

In [None]:
!pip install geopandas folium pandas


In [None]:
import folium

category_mapping = {
    "0%-10%": 1,
    "10%-25%": 2,
    "25%-50%": 3,
    "50%-75%": 4,
    "75%-90%": 5,
    "90%-100%": 6
}
data4map['cat4map_numeric'] = data4map['cat4map'].map(category_mapping)

# Convert GeoDataFrame to GeoJSON
geo_json_data = gdf.to_json()

# Create a base map centered on Europe
m = folium.Map(location=[50, 10], zoom_start=4)

# Add the Choropleth layer using the numeric column
folium.Choropleth(
    geo_data=geo_json_data,
    data=data4map,
    columns=['nuts_id', 'cat4map_numeric'], 
    key_on='feature.properties.polID',
    fill_color='PuRd',
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name='Trust Percentage'
).add_to(m)

# Display the map
m


In [None]:
data4map.to_csv('data4map.csv',index = False)

In [None]:
import folium

category_mapping = {
    "0%-10%": 1,
    "10%-25%": 2,
    "25%-50%": 3,
    "50%-75%": 4,
    "75%-90%": 5,
    "90%-100%": 6
}

data4map['cat4map_numeric'] = data4map['cat4map'].map(category_mapping)

geo_json_data = gdf.to_json()

m = folium.Map(location=[50, 10], zoom_start=4)

variables = data4map['Variable'].unique()


for var in variables:

    var_data = data4map[data4map['Variable'] == var]
    

    var_data['cat4map_numeric'] = var_data['cat4map'].map(category_mapping)
    

    folium.Choropleth(
        geo_data=geo_json_data,
        data=var_data,
        columns=['nuts_id', 'cat4map_numeric'],
        key_on='feature.properties.polID',
        fill_color='PuRd',
        fill_opacity=0.7,
        line_opacity=0.2,
        legend_name=f'Trust Percentage - {var}',
        name=var  
    ).add_to(m)


folium.LayerControl().add_to(m)


m


In [3]:
import folium
import json

#geo_json_data = json.loads(gdf.to_json())

with open('C:/Users/apillai/OneDrive - World Justice Project/Documents/GitHub/EU-NUTS-GIS/Data/EU_base_map.geojson', 'r', encoding = 'utf-8') as file:
    geo_json_data = json.load(file)


m = folium.Map(location=[50, 10], zoom_start=4)

for var in data4map['Variable'].unique():
    var_data = data4map[data4map['Variable'] == var].copy()
    
    # Convert categorical 'cat4map' to numeric if not done previously
    # category_mapping = {...} - Your predefined mapping from categories to numbers
    var_data['cat4map_numeric'] = var_data['cat4map'].map(category_mapping)
    
    # Ensure that the 'nuts_id' or 'polID' in var_data is of type string, if they need to match strings in the GeoJSON properties
    var_data['nuts_id'] = var_data['nuts_id'].astype(str)
    
    # Print the first few rows of var_data
    print(f"Variable: {var}")
    print(var_data.head())
    
    folium.Choropleth(
        geo_data=geo_json_data,
        name=var,
        data=var_data,
        columns=['nuts_id', 'cat4map_numeric'],  # The first column must match the key_on property
        key_on='feature.properties.polID',  # This must correctly reference the property in the GeoJSON
        fill_color='YlGn',
        fill_opacity=0.7,
        line_opacity=0.2,
        legend_name=f'Trust Percentage - {var}'
    ).add_to(m)

folium.LayerControl().add_to(m)
m.save('europe_trust_map.html')

Variable: TRT_judges
   nuts_id    Variable country_name_ltn  \
0      AT1  TRT_judges          Austria   
4      AT2  TRT_judges          Austria   
8      AT3  TRT_judges          Austria   
12     BE1  TRT_judges          Belgium   
16     BE2  TRT_judges          Belgium   

                                             nuts_ltn  Percentage  cat4map  \
0                                      Ostoesterreich   67.567568  50%-75%   
4                                     Suedoesterreich   62.500000  50%-75%   
8                                     Westoesterreich   60.000000  50%-75%   
12  Région de Bruxelles-Capitale/Brussels Hoofdste...   25.000000  10%-25%   
16                                      Vlaams Gewest   51.063830  50%-75%   

   polID                                            polNAME CNTR_CODE  \
0    AT1                                      Ostösterreich        AT   
4    AT2                                      Südösterreich        AT   
8    AT3                        

In [None]:
data4map.dtypes()

In [4]:
# Check for missing values in the columns of interest
missing_nuts_id = var_data['nuts_id'].isnull().any()
missing_cat4map_numeric = var_data['cat4map_numeric'].isnull().any()
missing_geometry = var_data['geometry'].isnull().any()

print(f"Missing 'nuts_id': {missing_nuts_id}")
print(f"Missing 'cat4map_numeric': {missing_cat4map_numeric}")
print(f"Missing 'geometry': {missing_geometry}")

# Optionally, drop rows where any of these crucial columns are missing
#var_data = var_data.dropna(subset=['nuts_id', 'cat4map_numeric', 'geometry'])




Missing 'nuts_id': False
Missing 'cat4map_numeric': False
Missing 'geometry': True


In [5]:
# Inspect rows with missing geometries
missing_geometries = var_data[var_data['geometry'].isnull()]
print("Rows with missing geometries:")
print(missing_geometries)

# Count the number of rows with missing geometries
count_missing_geometries = missing_geometries.shape[0]
print(f"Count of rows with missing geometries: {count_missing_geometries}")


Rows with missing geometries:
    nuts_id         Variable country_name_ltn              nuts_ltn  \
35      CY0  TRT_prosecutors           Cyprus                Kýpros   
147     EE0  TRT_prosecutors          Estonia                 Eesti   
151     EL3  TRT_prosecutors           Greece                Attiki   
155     EL4  TRT_prosecutors           Greece  Nisia Aigaiou, Kriti   
159     EL5  TRT_prosecutors           Greece         Voreia Elláda   
163     EL6  TRT_prosecutors           Greece       Kentriki Elláda   
279    HUI2  TRT_prosecutors          Hungary              Dunántúl   
283    HUI3  TRT_prosecutors          Hungary       Alföld és Észak   
315    LU00  TRT_prosecutors       Luxembourg            Luxembourg   
319    MT00  TRT_prosecutors            Malta                 Malta   
355    SI03  TRT_prosecutors         Slovenia     Vzhodna Slovenija   
359    SI04  TRT_prosecutors         Slovenia     Zahodna Slovenija   

     Percentage  cat4map polID polNAME CNTR_CO

In [6]:
# Remove rows where geometry is missing
var_data = var_data.dropna(subset=['geometry'])

# Now, var_data should have no rows with missing geometries


In [12]:
import folium
import json

# Assuming you've already loaded your geo_json_data correctly as before
# and have removed rows with missing geometries

# Select a single variable to test
single_variable = 'TRT_judges'

# Filter the data to include only the selected variable and exclude any missing geometries
simplified_data = var_data[(var_data['Variable'] == single_variable) & (var_data['geometry'].notnull())]

# For testing, let's use only the first 5 entries
simplified_data = simplified_data.head(5)

# Check the simplified data
print(simplified_data[['nuts_id', 'Variable', 'cat4map_numeric', 'geometry']])

# Create a Folium map centered around a point in Europe
simplified_map = folium.Map(location=[50, 10], zoom_start=4)

# Add a choropleth layer using the simplified data
folium.Choropleth(
    geo_data=geo_json_data,  # Your GeoJSON data
    data=simplified_data,  # The simplified DataFrame
    columns=['nuts_id', 'cat4map_numeric'],  # Columns to use for the key and color encoding
    key_on='feature.properties.polID',  # Ensure this matches the GeoJSON properties
    fill_color='BuPu',  # Color palette for the choropleth
    fill_opacity=0.7,  # Opacity for the choropleth fill
    line_opacity=0.2,  # Opacity for the choropleth line borders
    legend_name='Trust Percentage - Simplified'  # Legend title
).add_to(simplified_map)

# Display the map
simplified_map.save('simplified_map.html')


Empty DataFrame
Columns: [nuts_id, Variable, cat4map_numeric, geometry]
Index: []


In [13]:
# Verify data types
print(var_data.dtypes)

# Check unique values of 'cat4map_numeric' to ensure they are numeric
print(var_data['cat4map_numeric'].unique())


nuts_id               object
Variable              object
country_name_ltn      object
nuts_ltn              object
Percentage           float64
cat4map               object
polID                 object
polNAME               object
CNTR_CODE             object
geometry            geometry
cat4map_numeric        int64
dtype: object
[4 3 1 2 5 6]
