# Dataset for 'Vegans, vegetarians, fish-eaters and meat-eaters in the UK show discrepant environmental impacts'

They have published a fascinating research paper on their research (Scarborough et al,2023), however, there is a major limitation with their work.  There is a startling absence of any graphics or visual representations of their research data and findings.  They need your help with this challenge. They have been collecting a massive amount of diet and food consumption data from over 55,000 consumers linked together with food production from 38,000 farms across 119 countries..


Each row in the data set from the project contains the following information:

1. Participant number
2. Type of Diet
3. Gender
4. Age Group
5. Mean GHG emissions (GreenHouse Gas) measured in kg
6. Mean Agricultural Land Use in square meters
7. Mean Water Scarcity
8. Mean Eutrophication Potential‚Äì measured in g of PO4e, gPO4e
9. Mean GHG from CH4 emissions (Methane) from livestock management measured in kg
10. Mean GHG from N2O (Nitrous Oxide) emissions associated with fertilizer use
11. Mean Biodiversity Impact‚Äìspecies extinction per day
12. Mean Agricultural Water Usage in cubic meters (1 m3 - 1,000 liters)
13. Mean Acidification Potential


In [None]:
#!pip install plotly pandas --quiet

# Data importing

In [None]:
import pandas as pd
import plotly.express as px

df = pd.read_csv("Results_21Mar2022.csv")

df.head()


In [None]:
df.columns

# Tag indicator

In [None]:
radar_indicators = [
    "mean_ghgs", "mean_land", "mean_watscar", "mean_eut",
    "mean_ghgs_ch4", "mean_ghgs_n2o", "mean_bio", "mean_watuse", "mean_acid"
]

indicator_labels = {
    "mean_ghgs": "GHG Emissions",
    "mean_land": "Land Use",
    "mean_watscar": "Water Scarcity",
    "mean_eut": "Eutrophication",
    "mean_ghgs_ch4": "CH4 Emissions",
    "mean_ghgs_n2o": "N2O Emissions",
    "mean_bio": "Biodiversity",
    "mean_watuse": "Water Use",
    "mean_acid": "Acidification"
}

# Radar Chart

In [None]:
# === 3.  ===
df_radar = df.copy()
for col in radar_indicators:
    df_radar[col] = df_radar[col] / df_radar[col].mean()

In [None]:
# === 4. ===
df_long = df_radar.melt(
    id_vars=["diet_group", "sex", "age_group"],
    value_vars=radar_indicators,
    var_name="Indicator",
    value_name="Value"
)
df_long["Indicator"] = df_long["Indicator"].map(indicator_labels)

# === 5. ===
indicators = list(indicator_labels.values())
diet_groups = sorted(df_long["diet_group"].dropna().unique())



In [None]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots


indicators = list(indicator_labels.values())
diet_groups = sorted(df_long["diet_group"].dropna().unique())

# === 6. ===
fig = make_subplots(
    rows=2, cols=3,
    specs=[[{'type': 'polar'}]*3, [{'type': 'polar'}]*3],
    subplot_titles=[dg.capitalize() for dg in diet_groups],
    horizontal_spacing=0.22,
    vertical_spacing=0.15
)


# === 7. ===
row, col = 1, 1
for i, dg in enumerate(diet_groups):
    sub_df = df_long[df_long["diet_group"] == dg]

    for gender, color in zip(['female', 'male'], ['indianred', 'royalblue']):
        trace_df = sub_df[sub_df["sex"] == gender].groupby("Indicator").mean(numeric_only=True).reset_index()
        trace_df = trace_df.set_index("Indicator").reindex(indicators).reset_index()

        r_values = trace_df["Value"].tolist() + [trace_df["Value"].tolist()[0]]
        theta_values = indicators + [indicators[0]]

        fig.add_trace(go.Scatterpolar(
            r=r_values,
            theta=theta_values,
            name=gender.capitalize(),
            fill='toself',
            line=dict(color=color),
            showlegend=(i == 0),  # Âõæ‰æãÂè™Âú®Á¨¨‰∏ÄÂº†ÂõæÊòæÁ§∫
            legendgroup=gender,
            legendgrouptitle_text="Gender" if i == 0 else None,

        ), row=row, col=col)

    col += 1
    if col > 3:
        col = 1
        row += 1

# === 8. ===
fig.update_layout(
    title={
        'text': "Radar Chart: Environmental Impact by Diet and Gender",
        'x': 0.5,
        'font': dict(size=22, family="Arial", color="black")
    },
    legend=dict(
    font=dict(size=14),
    orientation='v',
    x=1.06,
    y=1
    ),
    height=900,
    width=1250,
    margin=dict(l=100, r=150, t=100, b=80),
    template="plotly_white",
    paper_bgcolor="white",
    font=dict(family="Arial", size=12),
    showlegend=True,

 )
for i in range(1, 7):
    fig.layout[f"polar{i}"].radialaxis = dict(
        range=[0, 2.5],
        tickfont=dict(size=12, family="Arial"),
        visible=True
    )

fig.update_annotations(font_size=18, font_family="Arial Black", font_color="black")


fig.show()





In [None]:
fig.write_html("radar_chart_interactive.html")


## Optimize


1. Min-Max
2. add age_group
3. use clear colors for different genders

In [None]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Step 1
df_long["Value_norm"] = df_long.groupby("Indicator")["Value"].transform(
    lambda x: (x - x.min()) / (x.max() - x.min())
)

indicators = list(indicator_labels.values())
diet_groups = sorted(df_long["diet_group"].dropna().unique())
age_groups = sorted(df_long["age_group"].dropna().unique())

fig = make_subplots(
    rows=2, cols=3,
    specs=[[{'type': 'polar'}]*3, [{'type': 'polar'}]*3],
    subplot_titles=[dg.capitalize() for dg in diet_groups],
    horizontal_spacing=0.22,
    vertical_spacing=0.15
)

gender_colors = {'female': '#e41a1c', 'male': '#377eb8'}
traces = []

# Step 2
for i, age in enumerate(age_groups):
    for j, dg in enumerate(diet_groups):
        sub_df = df_long[(df_long["age_group"] == age) & (df_long["diet_group"] == dg)]

        for gender in ['female', 'male']:
            trace_df = sub_df[sub_df["sex"] == gender].groupby("Indicator").agg(
                Value=("Value", "mean"), Value_norm=("Value_norm", "mean")
            ).reset_index()
            trace_df = trace_df.set_index("Indicator").reindex(indicators).reset_index()

            r_values = trace_df["Value_norm"].tolist() + [trace_df["Value_norm"].tolist()[0]]
            raw_values = trace_df["Value"].tolist() + [trace_df["Value"].tolist()[0]]
            theta_values = indicators + [indicators[0]]

            hovertext = [
                f"Indicator: {theta}<br>Raw: {raw:.2f}<br>Normalized: {r:.2f}<br>Diet: {dg}<br>Gender: {gender.capitalize()}<br>Age Group: {age}"
                for theta, r, raw in zip(theta_values, r_values, raw_values)
            ]

            trace = go.Scatterpolar(
                r=r_values,
                theta=theta_values,
                text=hovertext,
                hoverinfo='text',
                name=gender.capitalize(),
                fill='toself',
                line=dict(color=gender_colors[gender]),
                legendgroup=gender,
                showlegend=(j == 0),  # Âè™Ë¶ÅÊòØÊØèÁªÑÁ¨¨‰∏Ä‰∏™diet_groupÁöÑsubplotÂ∞±ÊòæÁ§∫legend
                legendgrouptitle_text="Gender" if j == 0 else None,
                visible=(i == 0),
                subplot=f"polar{j+1}"
            )
            traces.append(trace)

# Step 3
for trace in traces:
    fig.add_trace(trace)

# Step 4
buttons = []
traces_per_age = len(diet_groups) * 2
for i, age in enumerate(age_groups):
    visibility = [False] * len(traces)
    start = i * traces_per_age
    for j in range(traces_per_age):
        visibility[start + j] = True
    buttons.append(dict(
        label=age,
        method="update",
        args=[
            {"visible": visibility},
            {"annotations": [
            dict(  # annotations[0]Ôºöat bottom
                text="üí° Use dropdown to switch age groups",
                showarrow=False,
                x=0.6,
                y=-0.08,
                font=dict(size=14),
                xref="paper", yref="paper"
            ),
            dict(  # annotations[1]Ôºö
                text=f"Radar Chart: Environmental Impact by Diet and Gender (Age Group: {age})",
                showarrow=False,
                x=0.5,
                y=1.08,
                xref="paper", yref="paper",
                font=dict(size=22, family="Arial Black", color="black"),
                align="center",
                yanchor="bottom"
            ),
            # annotations[2]~[7]Ôºösubplot
            *[
                dict(
                    text=f"<b>{dg.capitalize()}</b>",
                    x=(fig.layout[f"polar{k+1}"].domain.x[0] + fig.layout[f"polar{k+1}"].domain.x[1]) / 2,
                    y=fig.layout[f"polar{k+1}"].domain.y[1] + 0.03,
                    xref="paper",
                    yref="paper",
                    showarrow=False,
                    font=dict(size=16, family="Arial Black", color="black"),
                    align="center"
                )
                for k, dg in enumerate(diet_groups)
            ]
    ]
}

        ]
    ))


# Step 5Ôºö
fig.update_layout(
    title={
        'x': 0.5,
        'y': 0.05,
        'font': dict(size=22, family="Arial Black", color="black")
    },
    updatemenus=[dict(
        buttons=buttons,
        direction="down",
        showactive=True,
        x=1.15,
        xanchor="left",
        y=1.06,
        yanchor="top"
    )],
    legend=dict(
        font=dict(size=14),
        orientation='v',
        x=1.07,
        y=1
    ),
    height=900,
    width=1250,
    margin=dict(l=100, r=150, t=100, b=80),
    template="plotly_white",
    paper_bgcolor="white",
    font=dict(family="Arial", size=12),
    showlegend=True,
    annotations=[
        dict( # annotations[0]
            text="üí° Use dropdown to switch age groups",
            showarrow=False,
            x=0.6,
            y=-0.08,
            font=dict(size=14),
            xref="paper", yref="paper"
        ),
        dict( # annotations[1]
        text=f"Radar Chart: Environmental Impact by Diet and Gender (Age Group: {age_groups[0]})",
        showarrow=False,
        x=0.5,
        y=1.08,
        xref="paper",
        yref="paper",
        font=dict(size=22, family="Arial Black", color="black"),
        align="center",
        yanchor="bottom",
    ),
    ]
)

# Step 6Ôºöradialaxis (0~1Ôºâ
for i in range(1, 7):
    fig.layout[f"polar{i}"].radialaxis = dict(
        range=[0, 0.7],
        tickfont=dict(size=12, family="Arial"),
        visible=True
    )



#  polar subplot
for i, dg in enumerate(diet_groups):
    domain = fig.layout[f"polar{i+1}"].domain
    x_center = (domain.x[0] + domain.x[1]) / 2
    y_top = domain.y[1] + 0.03

    fig.add_annotation(
        text=f"<b>{dg.capitalize()}</b>",
        x=x_center,
        y=y_top,
        xref="paper",
        yref="paper",
        showarrow=False,
        font=dict(size=16, family="Arial Black", color="black"),
        align="center"
    )



fig.show()


In [None]:
fig.write_html("radar_chart_interactive_optimized.html")


## Enhance interactive


In [None]:
# import plotly.graph_objects as go
# from plotly.subplots import make_subplots
#
# # Ê†áÂáÜÂåñÂ§ÑÁêÜ
# df_long["Value_norm"] = df_long.groupby("Indicator")["Value"].transform(
#     lambda x: (x - x.min()) / (x.max() - x.min())
# )
#
# # ÂàùÂßãÂåñÂèòÈáè
# indicators = list(indicator_labels.values())
# diet_groups = sorted(df_long["diet_group"].dropna().unique())
# age_groups = sorted(df_long["age_group"].dropna().unique())
# genders = ['female', 'male']
# gender_colors = {'female': '#e41a1c', 'male': '#377eb8'}
#
# # ÂàõÂª∫Âõæ
# fig = make_subplots(
#     rows=2, cols=3,
#     specs=[[{'type': 'polar'}]*3, [{'type': 'polar'}]*3],
#     horizontal_spacing=0.22,
#     vertical_spacing=0.15
# )
#
# # ÊûÑÂª∫ traces & meta Êï∞ÊçÆ
# traces = []
# meta = []  # (age, gender)
# for i, age in enumerate(age_groups):
#     for j, dg in enumerate(diet_groups):
#         sub_df = df_long[(df_long["age_group"] == age) & (df_long["diet_group"] == dg)]
#         for gender in genders:
#             trace_df = sub_df[sub_df["sex"] == gender].groupby("Indicator").agg(
#                 Value=("Value", "mean"),
#                 Value_norm=("Value_norm", "mean")
#             ).reset_index()
#             trace_df = trace_df.set_index("Indicator").reindex(indicators).reset_index()
#             r_values = trace_df["Value_norm"].tolist() + [trace_df["Value_norm"].tolist()[0]]
#             raw_values = trace_df["Value"].tolist() + [trace_df["Value"].tolist()[0]]
#             theta_values = indicators + [indicators[0]]
#             hovertext = [
#                 f"Indicator: {theta}<br>Raw: {raw:.2f}<br>Normalized: {r:.2f}<br>Diet: {dg}<br>Gender: {gender.capitalize()}<br>Age Group: {age}"
#                 for theta, r, raw in zip(theta_values, r_values, raw_values)
#             ]
#             trace = go.Scatterpolar(
#                 r=r_values,
#                 theta=theta_values,
#                 text=hovertext,
#                 hoverinfo='text',
#                 name=gender.capitalize(),
#                 fill='toself',
#                 line=dict(color=gender_colors[gender]),
#                 legendgroup=gender,
#                 showlegend=(j == 0),
#                 subplot=f"polar{j+1}",
#                 visible=(i == 0)  # ÂàùÂßãÊòæÁ§∫Á¨¨‰∏ÄÂπ¥ÈæÑÊÆµ
#             )
#             fig.add_trace(trace)
#             traces.append(trace)
#             meta.append((age, gender))
#
# # Â∑•ÂÖ∑ÂáΩÊï∞ÔºöÊ†πÊçÆÈÄâ‰∏≠Áä∂ÊÄÅÁîüÊàê visibility mask
# def get_visibility(selected_age, gender_flags):
#     return [
#         (a == selected_age) and gender_flags.get(g, False)
#         for (a, g) in meta
#     ]
#
# # ÂàùÂßãÁä∂ÊÄÅ
# selected_gender_flags = {'female': True, 'male': True}
# current_age = age_groups[0]
#
# # ÊûÑÈÄ† gender ÊåâÈíÆÔºàÊîØÊåÅ toggleÔºâ
# def create_gender_buttons():
#     gender_buttons = []
#     for gender in genders:
#         toggled_flags = selected_gender_flags.copy()
#         toggled_flags[gender] = not selected_gender_flags[gender]
#
#         visibility = get_visibility(current_age, toggled_flags)
#         label = f"{'‚úÖ' if toggled_flags[gender] else '‚ùå'} {gender.capitalize()}"
#
#         gender_buttons.append(dict(
#             label=label,
#             method="update",
#             args=[
#                 {"visible": visibility},
#                 {"title": f"Radar Chart: Environmental Impact by Diet and Gender (Age Group: {current_age})"}
#             ]
#         ))
#     return gender_buttons
#
#
# # Age ÊåâÈíÆ
# age_buttons = []
# for age in age_groups:
#     vis = get_visibility(age, selected_gender_flags)
#     age_buttons.append(dict(
#         label=age,
#         method="update",
#         args=[
#             {"visible": vis},
#             {"title": f"Radar Chart: Environmental Impact by Diet and Gender (Age Group: {age})"}
#         ]
#     ))
#
# # Ê∑ªÂä† layout ÂíåÊéß‰ª∂
# fig.update_layout(
#     title={
#         'text': f"Radar Chart: Environmental Impact by Diet and Gender (Age Group: {current_age})",
#         'x': 0.5,
#         'y': 0.98,
#         'font': dict(size=22)
#     },
#     updatemenus=[
#         dict(
#             type="buttons",
#             direction="right",
#             buttons=create_gender_buttons(),
#             showactive=True,
#             x=0.001,
#             y=1.05,
#             xanchor="left",
#             yanchor="bottom",
#             bgcolor='white',
#             bordercolor='gray'
#         ),
#         dict(
#             type="buttons",
#             direction="right",
#             buttons=age_buttons,
#             showactive=True,
#             x=0.99,
#             y=1.05,
#             xanchor="right",
#             yanchor="bottom",
#             bgcolor='white',
#             bordercolor='gray'
#         )
#     ],
#     legend=dict(
#         font=dict(size=14),
#         orientation='v',
#         x=1.07,
#         y=1
#     ),
#     height=900,
#     width=1250,
#     margin=dict(l=100, r=150, t=120, b=80),
#     template="plotly_white",
#     paper_bgcolor="white",
#     font=dict(family="Arial", size=12),
# )
#
# # Ê∑ªÂä† diet annotation ‰Ωú‰∏∫Â∞èÊ†áÈ¢ò
# for i, dg in enumerate(diet_groups):
#     domain = fig.layout[f"polar{i+1}"].domain
#     x_center = (domain.x[0] + domain.x[1]) / 2
#     y_top = domain.y[1] + 0.03
#     fig.add_annotation(
#         text=f"<b>{dg.capitalize()}</b>",
#         x=x_center,
#         y=y_top,
#         xref="paper",
#         yref="paper",
#         showarrow=False,
#         font=dict(size=16, family="Arial Black"),
#         align="center"
#     )
#
# # ËÆæÁΩÆÂùêÊ†áËΩ¥ËåÉÂõ¥
# for i in range(1, 7):
#     fig.layout[f"polar{i}"].radialaxis = dict(
#         range=[0, 0.7],
#         tickfont=dict(size=12, family="Arial"),
#         visible=True
#     )
#
# fig.show()


‚úÖ What can Standard Deviation (SD) tell us?


What it shows &	Explanation

Within-group variability	For example, among the Meat group, do males show large differences in environmental impact?

Diet group stability	Which diet group is more ‚Äústable‚Äù or more ‚Äúextreme‚Äù in its environmental effects?

Outlier group identification	Which diet, age, or gender group has unusually high variability, indicating large individual differences?

Visualizing confidence bands	SD can be used to draw error bars in radar charts or bar plots to show uncertainty or range.

‚úÖ Idea: Complement the radar chart with a Treemap visualization to enhance interpretation.

‚úî Value of the Treemap:

üë§ Tree structure: Gender ‚Üí Diet Group (hierarchical layout)

üì¶ Tile size: Represents either the number of participants or total impact (e.g., total GHG emissions)

üé® Color encoding: Reflects the mean or standard deviation (SD) of a selected environmental indicator ‚Äî for example, variability in Water Use

# Treemap

‚Ä¢	Variables:
    o	Hierarchical Dimensions:
        ÔÇß	sex (Male / Female)
        ÔÇß	age_group (e.g., 20‚Äì29, 30‚Äì39, ..., 70‚Äì79)
        ÔÇß	diet_group (Meat, Vegan, etc.)
    o	Quantitative Measures:
        ÔÇß	n_participants (represented by area)
        ÔÇß	Standard deviation (SD) of 4 environmental impacts:
        ÔÇß	Acidification (sd_acid)
        ÔÇß	Biodiversity loss (sd_bio)
        ÔÇß	Methane emissions (sd_ghgs_ch4)
        ÔÇß	Water use (sd_watuse)
‚Ä¢	Visual Mappings:
    o	Hierarchy: sex ‚Üí age_group ‚Üí diet_group
    o	Area: n_participants (group size)
    o	Color: sd_val (variability in environmental impact)
    o	Color Scale: Yellow ‚Üí Orange ‚Üí Red (higher SD = deeper red)


In [None]:
sd_columns = [col for col in df.columns if col.startswith('sd_')]
mean_columns = ['mean_' + col[3:] for col in sd_columns]

In [None]:
# # ÊûÑÈÄ†ÈïøË°®ÁªìÊûÑ
# records = []
# for sd_col, mean_col in zip(sd_cols, mean_cols):
#     for _, row in df.iterrows():
#         records.append({
#             'sd_type': sd_col.replace("sd_", "").replace("_", " ").title(),  # eg. Ghgs -> GHG Emissions
#             'sex': row['sex'],
#             'diet_group': row['diet_group'],
#             'n_participants': row['n_participants'],
#             'sd_value': row[sd_col],
#             'mean_value': row[mean_col]
#         })
#
# df_long = pd.DataFrame(records)

In [None]:
# # ÂàõÂª∫ subplotÔºö2 Ë°å √ó N ÂàóÔºàÊåâÈúÄË¶ÅËá™Âä®Êâ©Â±ïÔºâ
# n = len(sd_cols)
# cols = 3
# rows = (n + cols - 1) // cols
#
# fig = make_subplots(
#     rows=rows,
#     cols=cols,
#     subplot_titles=[col.replace("sd_", "").replace("_", " ").title() for col in sd_cols]
# )

In [None]:
# # ËÅöÂêàÔºöËÆ°ÁÆóÊØèÁªÑÂπ≥Âùá SD ÂíåÂπ≥Âùá meanÔºàÊåâ sd_type + sex + dietÔºâ
# agg_df = df_long.groupby(['sd_type', 'sex', 'diet_group']).agg(
#     n_participants=('n_participants', 'first'),
#     sd_value=('sd_value', 'mean'),
#     mean_value=('mean_value', 'mean')
# ).reset_index()

In [None]:
for sd_col, mean_col in zip(sd_columns, mean_columns):
    title_name = sd_col.replace("sd_", "").replace("_", " ").title()  # For title display

    agg_df = df.groupby(['sex','age_group', 'diet_group']).agg(
        n_participants=('n_participants', 'first'),
        mean_val=(mean_col, 'mean'),
        sd_val=(sd_col, 'mean')
    ).reset_index()

    fig = px.treemap(
        agg_df,
        path = ['sex','age_group', 'diet_group'],
        values='n_participants',
        color='sd_val',
        color_continuous_scale='YlOrRd',  # ‚Üê Â∞±Âú®ËøôÈáåÔºÅ
        hover_data={'mean_val': True, 'sd_val': True}
    )

    fig.update_layout(
        width=1350,
        height=180,
        margin=dict(t=40, l=20, r=20, b=20),
        # template='plotly_white',
        # paper_bgcolor='white',
        font=dict(family="Arial", size=12),
        title=f'{title_name}: Group Size and Variability (SD)',
        title_font=dict(family="Arial Black", size=18)
    )

    # fig.write_html(...)Ôºâ
    fig.show()

    filename = f"treemap_{sd_col}.html"
    fig.write_html(filename)
    print(f"‚úÖ Saved: {filename}")



In [None]:

from glob import glob


html_files = sorted(glob("treemap_sd_*.html"))

html_blocks = []
for file in html_files:
    with open(file, "r", encoding="utf-8") as f:
        content = f.read()
        body = content.split("<body>")[1].split("</body>")[0]
        html_blocks.append(body + "<hr>")  # ÊØè‰∏™Âõæ‰∏≠Èó¥ÊèíÊù°Á∫ø


with open("all_treemaps_combined.html", "w", encoding="utf-8") as f:
    f.write("<html><head><script src='https://cdn.plot.ly/plotly-latest.min.js'></script></head><body>\n")
    f.writelines(html_blocks)
    f.write("</body></html>")

print("‚úÖ all Treemaps have been integratedÔºöall_treemaps_combined.html")
