# Imports

In [None]:
import pandas as pd
import geopandas as gpd
import numpy as np
import altair as alt
import requests
import vegafusion as vf
vf.enable()
import json
import math

# Mock Ups

In [None]:
EuropeData = gpd.read_file('../Data/europe.geojson')
EuropeData

In [None]:
Default = alt.Chart(EuropeData).mark_geoshape(
    stroke='white',
    strokeWidth=1
).encode(
     color=alt.Color('POP2005', title='Volume of Toxins (Mil tons air)',bin=alt.Bin(maxbins=10),scale=alt.Scale(scheme='blues'))
).project('mercator').properties(
    width=600,
    height=500
)
temp = alt.Chart(EuropeData).mark_geoshape(
    stroke='white',
    strokeWidth=1
).encode(
     color=alt.Color('POP2005', title='Volume of Toxins (Mil tons air)',bin=alt.Bin(maxbins=10),scale=alt.Scale(scheme='reds'))
).project('mercator').properties(
    width=600,
    height=500
)

In [None]:
Default

In [None]:
temp

# Air Qualtiy Graphs

In [None]:
AirInfo = pd.read_csv('../Data/who_ambient_air_quality_database_version_2024_(v6.1).csv')
AirInfo = AirInfo.dropna(subset=["year"])
AirInfo["year"] = AirInfo["year"].apply(lambda x: int(x))
AirInfo = AirInfo[AirInfo["who_region"] == "4_Eur"]
AirInfo

In [None]:
# Load the data from the uploaded CSV file
file_path = '../data/who_ambient_air_quality_database_version_2024_(v6.1).csv'
df = pd.read_csv(file_path)

# Remove NaN values from the year column before converting to integer
df = df.dropna(subset=['year'])

# Ensure the year column is of type integer
df['year'] = df['year'].astype(int)

# Ensure the year range is from 2010 to 2022
years = list(range(2010, 2023))

# Generate all country-year combinations
countries = df['country_name'].unique()
new_index = pd.MultiIndex.from_product([countries, years], names=['country_name', 'year'])

# Aggregate duplicate entries by averaging numeric values for the same country and year
df = df.groupby(['country_name', 'year']).mean(numeric_only=True).reset_index()

# Set MultiIndex again after aggregation
df.set_index(['country_name', 'year'], inplace=True)

# Reindex to create missing years with NaNs and reset the index
df = df.reindex(new_index).reset_index()

# Replace emission data NaNs with 0, while retaining country and year information
emission_columns = [col for col in df.columns if 'concentration' in col.lower()]
df[emission_columns] = df[emission_columns].fillna(0)

# Display the cleaned dataframe
df


In [None]:
df[df["country_name"] == "Russian Federation"]

In [None]:
AirInfo = AirInfo.groupby(['country_name', 'year']).agg({
    'pm10_concentration': 'mean',  # Use 'mean' to calculate the average concentration
    'pm25_concentration': 'mean',
    'no2_concentration': 'mean'
}).reset_index()

AirInfo

In [None]:
df = df.groupby(['country_name', 'year']).agg({
    'pm10_concentration': 'mean',  # Use 'mean' to calculate the average concentration
    'pm25_concentration': 'mean',
    'no2_concentration': 'mean'
}).reset_index()

df

In [None]:
vf.enable(row_limit=50000)
temp = alt.Chart(AirInfo).mark_line().encode(
    x="year:T",
    y="pm10_concentration:Q",
    color='country_name'
).properties(
    width=750,
    height=300
)
temp

In [None]:
vf.enable(row_limit=50000)
temp = alt.Chart(AirInfo[AirInfo["country_name"] == "Andorra"]).mark_line().encode(
    x="year:T",
    y="pm10_concentration:Q",
    color='country_name'
).properties(
    width=750,
    height=300
)
temp

In [None]:
vf.enable(row_limit=50000)
temp = alt.Chart(df[df["country_name"] == "Andorra"]).mark_line().encode(
    x="year:T",
    y="pm10_concentration:Q",
    color='country_name'
).properties(
    width=750,
    height=300
)
temp

In [None]:
merged_data = EuropeData.merge(AirInfo, how='inner', left_on='NAME', right_on='country_name')

# Prepare the merged data for Altair (convert to GeoJSON)
merged_data_json = merged_data.to_json()
merged_data

In [None]:
merged_data2 = EuropeData.merge(df, how='inner', left_on='NAME', right_on='country_name')

# Prepare the merged data for Altair (convert to GeoJSON)
merged_data_json2 = merged_data.to_json()
merged_data2

In [None]:
set1 = set(EuropeData["NAME"].unique())
set2 = set(AirInfo["country_name"].unique())
# Find differences
only_in_array1 = set1 - set2
only_in_array2 = set2 - set1

print(f"Strings only in EuropeNames: {only_in_array1}")
print(f"Strings only in AirInfoNames: {only_in_array2}")

In [None]:
name_mapping = {
    "Russian Federation": "Russia",
    "North Macedonia": "The former Yugoslav Republic of Macedonia",
    "Netherlands (Kingdom of the)": "Netherlands",
    "T?�rkiye": "Turkey",
    "United Kingdom of Great Britain and Northern Ireland": "United Kingdom",
    "Czechia": "Czech Republic",
    "Republic of Moldova": "Republic of Moldova",  # Example of keeping the same
    # Add other mappings as needed
}
for old_name, new_name in name_mapping.items():
    AirInfo["country_name"] = AirInfo["country_name"].replace(old_name, new_name)
    df["country_name"] = df["country_name"].replace(old_name, new_name)

In [None]:
AirInfo["country_name"].unique()

In [None]:
# Load CSV and GeoJSON files
geo_data = gpd.read_file('../data/europe.geojson')

# Merge data
merged_data = geo_data.merge(AirInfo, how='inner', left_on='NAME', right_on='country_name')

# Convert the GeoDataFrame to GeoJSON
geojson_data = merged_data.to_json()

# Create an Altair geoshape chart
merged_data

In [None]:
# Load CSV and GeoJSON files
geo_data = gpd.read_file('../data/europe.geojson')

# Merge data
merged_data2 = geo_data.merge(df, how='inner', left_on='NAME', right_on='country_name')

# Convert the GeoDataFrame to GeoJSON
geojson_data2 = merged_data.to_json()

# Create an Altair geoshape chart
merged_data2

In [None]:
filtered_data = merged_data[merged_data['year'] == 2019]

chart = alt.Chart(filtered_data).mark_geoshape().encode(
    color=alt.Color('pm25_concentration:Q', title='PM2.5 Concentration'),
    tooltip=[
        alt.Tooltip('country_name:N', title='Country'),
        alt.Tooltip('pm10_concentration:Q', title='PM10'),
        alt.Tooltip('pm25_concentration:Q', title='PM2.5'),
        alt.Tooltip('no2_concentration:Q', title='NO2')
    ]
).properties(
    width=800,
    height=600,
    title="Chemical Concentrations in Europe"
).project(
    type='mercator',  # Projection type
    center=[10, 50],  # Approximate center of Europe (longitude, latitude)
    scale=500  # Adjust scale for zoom
)
chart

In [None]:
min_pm25 = merged_data['pm25_concentration'].min()
max_pm25 = merged_data['pm25_concentration'].max()

# Create a slider selection
slider = alt.binding_range(min=2010, max=2022, step=1, name="Year")
year_selection = alt.selection_point(fields=['year'], bind=slider, name="Select")

# Create the chart
chart = alt.Chart(merged_data).mark_geoshape().encode(
    color=alt.Color(
        'pm25_concentration:Q',
        title='PM2.5 Concentration',
        scale=alt.Scale(domain=[min_pm25, max_pm25], clamp=True)  # Constant scale
    ),
    tooltip=[
        alt.Tooltip('country_name:N', title='Country'),
        alt.Tooltip('pm10_concentration:Q', title='PM10'),
        alt.Tooltip('pm25_concentration:Q', title='PM2.5'),
        alt.Tooltip('no2_concentration:Q', title='NO2')
    ]
).properties(
    width=800,
    height=600,
    title="Chemical Concentrations in Europe"
).project(
    type='mercator',
    center=[10, 50],
    scale=500
).add_params(
    year_selection
).transform_filter(
    year_selection
)

chart


In [None]:
min_pm25 = merged_data['pm25_concentration'].min()
max_pm25 = merged_data['pm25_concentration'].max()

# Create a slider selection
slider = alt.binding_range(min=2010, max=2022, step=1, name="Year")
year_selection = alt.selection_point(fields=['year'], bind=slider, name="Select")

# Create the chart
chart = alt.Chart(merged_data2).mark_geoshape(
    stroke='white',        # Set the border color (e.g., black)
    strokeWidth=0.4
).encode(
    color=alt.condition(
        alt.datum.pm25_concentration > 0,
        alt.Color(
            'pm25_concentration:Q',
            title='PM2.5 Concentration',
            scale=alt.Scale(domain=[min_pm25, max_pm25], clamp=True, scheme='magma',reverse=True)
        ),
        alt.value('lightgray')  # Gray out countries with PM2.5 = 0
    ),
    tooltip=[
        alt.Tooltip('country_name:N', title='Country'),
        alt.Tooltip('pm10_concentration:Q', title='PM10'),
        alt.Tooltip('pm25_concentration:Q', title='PM2.5'),
        alt.Tooltip('no2_concentration:Q', title='NO2')
    ]
).properties(
    width=800,
    height=600,
    title="Chemical Concentrations in Europe"
).project(
    type='mercator',
    center=[10, 50],
    scale=500
).add_params(
    year_selection
).transform_filter(
    year_selection
)

chart

In [None]:
min_pm25 = merged_data['pm25_concentration'].min()
max_pm25 = merged_data['pm25_concentration'].max()

# Create a slider selection
slider = alt.binding_range(min=2010, max=2022, step=1, name="Year")
year_selection = alt.selection_point(fields=['year'], bind=slider, name="Select")

# Create the chart
chart = alt.Chart(merged_data2).mark_geoshape(
    stroke='white',        # Set the border color (e.g., black)
    strokeWidth=0.4
).encode(
    color=alt.condition(
        alt.datum.pm25_concentration > 0,
        alt.Color(
            'pm10_concentration:Q',
            title='PM1.0 Concentration',
            scale=alt.Scale(domain=[min_pm25, max_pm25], clamp=True, scheme='magma',reverse=True)
        ),
        alt.value('lightgray')  # Gray out countries with PM2.5 = 0
    ),
    tooltip=[
        alt.Tooltip('country_name:N', title='Country'),
        alt.Tooltip('pm10_concentration:Q', title='PM10'),
        alt.Tooltip('pm25_concentration:Q', title='PM2.5'),
        alt.Tooltip('no2_concentration:Q', title='NO2')
    ]
).properties(
    width=800,
    height=600,
    title="Chemical Concentrations in Europe"
).project(
    type='mercator',
    center=[10, 50],
    scale=500
).add_params(
    year_selection
).transform_filter(
    year_selection
)

chart

In [None]:
min_pm25 = merged_data['pm25_concentration'].min()
max_pm25 = merged_data['pm25_concentration'].max()

# Create a slider selection
slider = alt.binding_range(min=2010, max=2022, step=1, name="Year")
year_selection = alt.selection_point(fields=['year'], bind=slider, name="Select")

# Create the chart
chart = alt.Chart(merged_data2).mark_geoshape(
    stroke='white',        # Set the border color (e.g., black)
    strokeWidth=0.4
).encode(
    color=alt.condition(
        alt.datum.no2_concentration > 0,
        alt.Color(
            'no2_concentration:Q',
            title='NO2 Concentration',
            scale=alt.Scale(domain=[min_pm25, max_pm25], clamp=True, scheme='magma',reverse=True)
        ),
        alt.value('lightgray')  # Gray out countries with PM2.5 = 0
    ),
    tooltip=[
        alt.Tooltip('country_name:N', title='Country'),
        alt.Tooltip('pm10_concentration:Q', title='PM10'),
        alt.Tooltip('pm25_concentration:Q', title='PM2.5'),
        alt.Tooltip('no2_concentration:Q', title='NO2')
    ]
).properties(
    width=800,
    height=600,
    title="Chemical Concentrations in Europe"
).project(
    type='mercator',
    center=[10, 50],
    scale=500
).add_params(
    year_selection
).transform_filter(
    year_selection
)

chart

In [None]:
# temp = alt.Chart(AirInfo[AirInfo["country_name"] == "Spain"]).mark_line().encode(
#     x="year:T",
#     y="pm10_concentration:Q",
#     facet=alt.Facet('city',columns=5)
# ).properties(
#     width=100,
#     height=100
# )
# temp