In [1]:
import pandas as pd

# 加载数据
df = pd.read_csv('Results_21Mar2022.csv')

# 规范化列名（去除空格，转换为小写）
df.columns = df.columns.str.strip().str.lower()
print("列名：", df.columns)

# 检查数据
print(df.head())

# 检查缺失值并处理
print(df.isnull().sum())
df.fillna(df.mean(numeric_only=True), inplace=True)
df.dropna(inplace=True)

# 定义数值列和标准差列
numeric_cols = ['mean_ghgs', 'mean_land', 'mean_watscar', 'mean_eut', 'mean_ghgs_ch4', 'mean_ghgs_n2o', 'mean_bio', 'mean_watuse', 'mean_acid']
sd_cols = ['sd_ghgs', 'sd_land', 'sd_watscar', 'sd_eut', 'sd_ghgs_ch4', 'sd_ghgs_n2o', 'sd_bio', 'sd_watuse', 'sd_acid']

# 按饮食类型和年龄组分组（树形图、平行坐标图）
df_diet_age = df.groupby(['diet_group', 'age_group'])[numeric_cols + sd_cols].mean().reset_index()

# 按饮食类型和性别分组（雷达图）
df_diet_gender = df.groupby(['diet_group', 'sex'])[numeric_cols + sd_cols].mean().reset_index()

# 按饮食类型分组（散点图矩阵、矩阵图）
df_diet = df.groupby('diet_group')[numeric_cols + sd_cols].mean().reset_index()

# 标准化数据
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
df_diet_age[numeric_cols] = scaler.fit_transform(df_diet_age[numeric_cols])
df_diet_gender[numeric_cols] = scaler.fit_transform(df_diet_gender[numeric_cols])
df_diet[numeric_cols] = scaler.fit_transform(df_diet[numeric_cols])

# 添加综合指标
df_diet_age['composite_impact'] = df_diet_age[numeric_cols].mean(axis=1)
df_diet_age['composite_sd'] = df_diet_age[sd_cols].mean(axis=1)

df_diet_gender['composite_impact'] = df_diet_gender[numeric_cols].mean(axis=1)
df_diet_gender['composite_sd'] = df_diet_gender[sd_cols].mean(axis=1)

df_diet['composite_impact'] = df_diet[numeric_cols].mean(axis=1)
df_diet['composite_sd'] = df_diet[sd_cols].mean(axis=1)

列名： Index(['mc_run_id', 'grouping', 'mean_ghgs', 'mean_land', 'mean_watscar',
       'mean_eut', 'mean_ghgs_ch4', 'mean_ghgs_n2o', 'mean_bio', 'mean_watuse',
       'mean_acid', 'sd_ghgs', 'sd_land', 'sd_watscar', 'sd_eut',
       'sd_ghgs_ch4', 'sd_ghgs_n2o', 'sd_bio', 'sd_watuse', 'sd_acid',
       'n_participants', 'sex', 'diet_group', 'age_group'],
      dtype='object')
   mc_run_id           grouping  mean_ghgs  mean_land  mean_watscar  \
0          1  fish_female_20-29   5.421821   8.612772   14165.86240   
1          1  fish_female_30-39   5.436673   8.680248   15222.77063   
2          1  fish_female_40-49   5.289333   8.467723   15818.80068   
3          1  fish_female_50-59   5.169020   8.219199   16962.06009   
4          1  fish_female_60-69   5.380268   8.570718   18212.66232   

    mean_eut  mean_ghgs_ch4  mean_ghgs_n2o    mean_bio  mean_watuse  ...  \
0  23.945372       1.164643       0.343663  476.270413   679.923479  ...   
1  24.305262       1.154118       0.326868  

In [2]:
import plotly.express as px

# 颜色方案
diet_colors = {
    'vegan': '#2ECC71',
    'vegetarian': '#3498DB',
    'fish': '#F1C40F',
    'meat': '#E74C3C'
}

fig_treemap = px.treemap(
    df_diet_age,
    path=['age_group', 'diet_group'],
    values='composite_impact',
    color='composite_impact',
    color_continuous_scale='RdYlGn',
    title='Composite Environmental Impact by Age Group and Diet Type'
)

fig_treemap.update_layout(
    title_font_size=20,
    font=dict(size=12),
    margin=dict(l=50, r=50, t=80, b=50)
)

fig_treemap.show()

In [35]:
import pandas as pd
import plotly.express as px
from sklearn.preprocessing import MinMaxScaler

# 加载数据
df = pd.read_csv('Results_21Mar2022.csv')

# 规范化列名
df.columns = df.columns.str.strip().str.lower()

# 处理缺失值
df.fillna(df.mean(numeric_only=True), inplace=True)
df.dropna(inplace=True)

# 定义数值列（精简到关键指标以避免过于复杂）
numeric_cols = ['mean_ghgs', 'mean_land', 'mean_watscar', 'mean_eut']

# 按饮食类型和年龄组分组
df_diet_age = df.groupby(['diet_group', 'age_group'])[numeric_cols].mean().reset_index()

# 标准化数据（0-1范围）
scaler = MinMaxScaler()
df_diet_age[numeric_cols] = scaler.fit_transform(df_diet_age[numeric_cols])

# 添加综合影响指标
df_diet_age['composite_impact'] = df_diet_age[numeric_cols].mean(axis=1)

# 创建散点图矩阵
fig = px.scatter_matrix(
    df_diet_age,
    dimensions=numeric_cols + ['composite_impact'],  # 显示的指标
    color='diet_group',  # 按饮食类型着色
    symbol='age_group',  # 按年龄组区分形状
    title='Environmental Impact Relationships by Diet and Age Group',
    hover_data=['diet_group', 'age_group'],  # 悬停显示信息
    color_discrete_sequence=px.colors.qualitative.Plotly,  # 鲜艳的颜色
    labels={col: col.replace('mean_', '').capitalize() for col in numeric_cols + ['composite_impact']}
)

# 更新布局
fig.update_traces(
    diagonal_visible=False,  # 隐藏对角线（避免冗余）
    showupperhalf=False,  # 只显示下三角，减少重复
    marker=dict(size=10, opacity=0.7)
)

fig.update_layout(
    title_font_size=22,
    font=dict(size=13),
    plot_bgcolor='white',
    paper_bgcolor='white',
    width=1000,
    height=1000,
    legend=dict(
        title='Diet Group / Age Group',
        x=1.1, y=1.0
    )
)

# 保存为 HTML 文件以支持交互性
fig.write_html("scatter_matrix.html")

# 显示图表
fig.show()

In [20]:
import plotly.graph_objects as go
import plotly.io as pio

# 🧱 准备数据
heatmap_df = df_diet_gender.copy()
heatmap_df['group'] = heatmap_df['diet_group'] + ' (' + heatmap_df['sex'] + ')'
heatmap_df = heatmap_df.set_index('group')

# 提取热力图矩阵
z = heatmap_df[numeric_cols].values
x = [col.replace('mean_', '') for col in numeric_cols]
y = heatmap_df.index.tolist()

# 🎨 创建热力图
fig_heatmap = go.Figure(data=go.Heatmap(
    z=z,
    x=x,
    y=y,
    colorscale='Viridis',
    colorbar=dict(title='Normalized Impact'),
    zmin=0,
    zmax=1
))

fig_heatmap.update_layout(
    title='Environmental Impact Heatmap by Diet Type and Gender',
    xaxis_title='Impact Category',
    yaxis_title='Diet + Gender',
    width=900,
    height=500
)

# 显示图
fig_heatmap.show()

# 导出 HTML
pio.write_html(fig_heatmap, file="heatmap_diet_gender.html", auto_open=True)
