## Data Visualization

### Expenditures

In [1]:
import pandas as pd
import os
import numpy as np
import altair as alt
import warnings
warnings.filterwarnings('ignore')

In [2]:
def merge_csv_to_dataframe(folder_path):
    file_names = [f for f in os.listdir(folder_path) if f.endswith('.csv')]
    if not file_names:
        print("No .csv files found in the folder.")
        return None
    combined_df = pd.DataFrame()
    for file_name in file_names:
        file_path = os.path.join(folder_path, file_name)
        try:
            df = pd.read_csv(file_path)
            year = file_name[:4]
            if year.isdigit():
                df['Year'] = int(year)  # 将年份添加为新列
            else:
                print(f"Skipping file with invalid format: {file_name}")
                continue
            
            combined_df = pd.concat([combined_df, df], ignore_index=True)
        except Exception as e:
            print(f"Error reading file {file_name}: {e}")
    
    return combined_df

expenditure_folder_path = "./Datasets_Cleaned/Expenditures/"  
expenditure_df = merge_csv_to_dataframe(expenditure_folder_path)

In [3]:
expenditure_df_figure3 = expenditure_df[expenditure_df['payee_state'] == 'MI'].copy()
expenditure_df_figure3["exp_date"] = pd.to_datetime(expenditure_df_figure3["exp_date"], errors='coerce')
expenditure_df_figure3 = expenditure_df_figure3.dropna(subset=["exp_date"])
expenditure_df_figure3 = expenditure_df_figure3[
    (expenditure_df_figure3['exp_date'].dt.year >= 2020) &
    (expenditure_df_figure3['exp_date'].dt.year <= 2025)
]
expenditure_df_figure3["amount"] = pd.to_numeric(expenditure_df_figure3["amount"], errors="coerce")
expenditure_df_figure3 = expenditure_df_figure3.dropna(subset=["amount"])
def clean_expenditure_type(value):
    if 'Expenditures' in value:
        return value.split('Expenditures')[0] + 'Expenditures' 
    else:
        return None 
expenditure_df_figure3['expenditure_type'] = expenditure_df_figure3['expenditure_type'].apply(clean_expenditure_type)
expenditure_df_figure3 = expenditure_df_figure3.dropna(subset=['expenditure_type'])
expenditure_df_figure3['log_amount'] = np.log10(expenditure_df_figure3['amount'] + 1)
expenditure_df_figure3['month_start'] = expenditure_df_figure3['exp_date'].dt.to_period('M').apply(lambda r: r.start_time)
expenditure_df_figure3['year'] = expenditure_df_figure3['month_start'].dt.year
expenditure_df_figure3 = expenditure_df_figure3.dropna(subset=['exp_desc'])
expenditure_df_monthly_grouped = expenditure_df_figure3.groupby(['expenditure_type', 'month_start'])[['amount']].sum().reset_index()
expenditure_df_monthly_grouped['year'] = expenditure_df_monthly_grouped['month_start'].dt.year
expenditure_df_monthly_grouped['year_str'] = expenditure_df_monthly_grouped['year'].astype(str)

### Filter

In [4]:
years = sorted(expenditure_df_monthly_grouped['year'].unique().tolist())
year_checkboxes = []
for year in years:
    checkbox = alt.binding_checkbox(name=f'{year} ')
    year_checkboxes.append(
        alt.param(name=f'year_{year}', value=True, bind=checkbox)
    )

### Graph

In [5]:
expenditure_monthly_chart = alt.Chart(expenditure_df_monthly_grouped).mark_line(point=True).encode(
    x=alt.X('month_start:T', title='Month Start Date'),
    y=alt.Y('log_amount:Q', title='Log10(Total Amount)'),
    color=alt.Color(
        'expenditure_type:N', 
        title='Expenditure Type',
        # legend=alt.Legend(
        #     title='Expenditure Type',
        #     orient='bottom-left',
        #     labelFontSize=12,
        #     titleFontSize=14
        # )
    ),
    tooltip=[
        alt.Tooltip('month_start:T', title='Month', format='%Y-%m'),
        alt.Tooltip('expenditure_type:N', title='Expenditure Type'),
        alt.Tooltip('amount:Q', title='Amount', format='$,.2f'),
        alt.Tooltip('log_amount:Q', title='Log10(Amount)', format='.2f')
    ]
).transform_calculate(
    log_amount='log(datum.amount + 1) / log(10)'
).add_params(
    *year_checkboxes
).transform_filter(
    ' || '.join([f'datum.year == {year} && year_{year}' for year in years])
).properties(
    title='Monthly Expenditure (Grouped by Month) - Log Scale',
    width=800,
    height=400
).interactive()

expenditure_monthly_chart.show()


In [6]:
expenditure_df_bar = expenditure_df_figure3.copy()
expenditure_yearly_grouped = expenditure_df_bar.groupby(['year', 'exp_desc'])[['amount']].sum().reset_index()
expenditure_yearly_grouped['year_str'] = expenditure_yearly_grouped['year'].astype(str)
expenditure_yearly_grouped['log_amount'] = np.log10(expenditure_yearly_grouped['amount'] + 1)
years_bar = sorted(expenditure_yearly_grouped['year'].unique().tolist())
all_top_exp_desc = set()
for year in years_bar:
    year_data = expenditure_yearly_grouped[expenditure_yearly_grouped['year'] == year].copy()
    top_exp_desc_year = year_data.nlargest(15, 'amount')['exp_desc'].tolist()
    # crop the top one
    top_exp_desc_year = top_exp_desc_year[1:]
    all_top_exp_desc.update(top_exp_desc_year)
expenditure_yearly_top = expenditure_yearly_grouped[
    expenditure_yearly_grouped['exp_desc'].isin(all_top_exp_desc)
].copy()
year_checkboxes = []
for year in years_bar:
    checkbox = alt.binding_checkbox(name=f'{year} ')
    year_checkboxes.append(
        alt.param(name=f'year_{year}', value=True, bind=checkbox)
    )

expenditure_bar_chart = alt.Chart(expenditure_yearly_top).mark_bar().encode(
    y=alt.Y('exp_desc:N', 
            title='Expenditure Description', 
            sort='-x'),
    x=alt.X('log_amount:Q', title='Log10(Total Amount)'),
    color=alt.Color('year:N', 
                    title='Year',
                    scale=alt.Scale(scheme='category10')),
    tooltip=[
        alt.Tooltip('year:N', title='Year'),
        alt.Tooltip('exp_desc:N', title='Description'),
        alt.Tooltip('amount:Q', title='Amount', format='$,.2f'),
        alt.Tooltip('log_amount:Q', title='Log10(Amount)', format='.2f')
    ]
).add_params(
    *year_checkboxes
).transform_filter(
    ' || '.join([f'datum.year == {year} && year_{year}' for year in years_bar])
).properties(
    title='Top 15 Expenditure Descriptions by Year (Log Scale)',
    width=800,
    height=600
)

expenditure_bar_chart.show()


## Contributions

In [7]:
contribution_folder_path = "./Datasets_Cleaned/Contributions/"  
contribution_df = merge_csv_to_dataframe(contribution_folder_path)

In [8]:
contribution_df["contributor_f_name"] = contribution_df["contributor_f_name"].str.lower().str.strip()
contribution_df["contributor_l_name_or_org"] = contribution_df["contributor_l_name_or_org"].str.lower().str.strip()
contribution_df["amount"] = pd.to_numeric(contribution_df["amount"], errors="coerce")
contribution_df["log_amount"] = np.log10(contribution_df["amount"] + 1)
contribution_df["contribution_date"] = pd.to_datetime(contribution_df["received_date"], errors='coerce')
contribution_df = contribution_df.dropna(subset=["contribution_date"])
contribution_df['year'] = contribution_df['contribution_date'].dt.year
contribution_df = contribution_df[
    (contribution_df['year'] >= 2020) &
    (contribution_df['year'] <= 2025)
]
contribution_df_figure = contribution_df.copy()
contribution_df_figure["received_date"] = pd.to_datetime(contribution_df_figure["received_date"], errors='coerce')
contribution_df_figure = contribution_df_figure.dropna(subset=["received_date"])
contribution_df_figure["amount"] = pd.to_numeric(contribution_df_figure["amount"], errors="coerce")
contribution_df_figure = contribution_df_figure.dropna(subset=["amount"])

# 清洗zip code和contribution type
contribution_df_figure = contribution_df_figure.dropna(subset=['contributor_zip', 'contribtype'])
contribution_df_figure['contributor_zip'] = contribution_df_figure['contributor_zip'].astype(str).str.strip()
contribution_df_figure['contribtype'] = contribution_df_figure['contribtype'].astype(str).str.strip()

# 创建月份和年份字段
contribution_df_figure['month_start'] = contribution_df_figure['received_date'].dt.to_period('M').apply(lambda r: r.start_time)
contribution_df_figure['year'] = contribution_df_figure['month_start'].dt.year

# 按月份和贡献类型分组
contribution_df_monthly_grouped = contribution_df_figure.groupby(['contribtype', 'month_start'])[['amount']].sum().reset_index()
contribution_df_monthly_grouped['year'] = contribution_df_monthly_grouped['month_start'].dt.year
contribution_df_monthly_grouped['year_str'] = contribution_df_monthly_grouped['year'].astype(str)

In [9]:
contribution_monthly_chart = alt.Chart(contribution_df_monthly_grouped).mark_line(point=True).encode(
    x=alt.X('month_start:T', title='Month Start Date'),
    y=alt.Y('log_amount:Q', title='Log10(Total Amount)'),
    color=alt.Color(
        'contribtype:N', 
        title='Contribution Type',
        legend=alt.Legend(
            title='Contribution Type',
            orient='bottom-left',
            labelFontSize=12,
            titleFontSize=14
        )
    ),
    tooltip=[
        alt.Tooltip('month_start:T', title='Month', format='%Y-%m'),
        alt.Tooltip('contribtype:N', title='Contribution Type'),
        alt.Tooltip('amount:Q', title='Amount', format='$,.2f'),
        alt.Tooltip('log_amount:Q', title='Log10(Amount)', format='.2f')
    ]
).transform_calculate(
    log_amount='log(datum.amount + 1) / log(10)'
).add_params(
    *year_checkboxes
).transform_filter(
    ' || '.join([f'datum.year == {year} && year_{year}' for year in years])
).properties(
    title='Monthly Contributions by Type (Grouped by Month) - Log Scale',
    width=800,
    height=400
).interactive()

contribution_monthly_chart.show()


In [10]:
# 按年份和贡献者聚合数据
contribution_df_grouped = contribution_df.groupby(['year', 'contributor_f_name', 'contributor_l_name_or_org'])['amount'].sum().reset_index()
contribution_df_grouped["Name/Org"] = contribution_df_grouped["contributor_f_name"] + " " + contribution_df_grouped["contributor_l_name_or_org"]

# 获取所有年份的Top 15贡献者
years = sorted(contribution_df_grouped['year'].unique().tolist())
all_top_contributors = set()
for year in years:
    year_data = contribution_df_grouped[contribution_df_grouped['year'] == year].copy()
    top_contributors_year = year_data.nlargest(3, 'amount')['Name/Org'].tolist()
    all_top_contributors.update(top_contributors_year)

# 筛选出所有年份的Top 15贡献者数据
top_contributions_df = contribution_df_grouped[
    contribution_df_grouped['Name/Org'].isin(all_top_contributors)
].copy()

# 计算log值
top_contributions_df["log_amount"] = np.log10(top_contributions_df["amount"] + 1)

# %% [markdown]
# ### Filter

# %%
# 创建年份复选框
years = sorted(top_contributions_df['year'].unique().tolist())
year_checkboxes = []
for year in years:
    checkbox = alt.binding_checkbox(name=f'{year} ')
    year_checkboxes.append(
        alt.param(name=f'year_{year}', value=True, bind=checkbox)
    )

# %% [markdown]
# ### Graph

# %%
contribution_bar_chart = alt.Chart(top_contributions_df).mark_bar().encode(
    y=alt.Y("Name/Org:N", 
            title="Contributor Name/Organization", 
            sort="-x"),
    x=alt.X("log_amount:Q", 
            title="Log10(Total Amount)"),
    color=alt.Color('year:N', 
                    title='Year',
                    scale=alt.Scale(scheme='category10')),
    tooltip=[
        alt.Tooltip('year:N', title='Year'),
        alt.Tooltip('Name/Org:N', title='Contributor'),
        alt.Tooltip('amount:Q', title='Amount', format='$,.2f'),
        alt.Tooltip('log_amount:Q', title='Log10(Amount)', format='.2f')
    ]
).add_params(
    *year_checkboxes
).transform_filter(
    ' || '.join([f'datum.year == {year} && year_{year}' for year in years])
).properties(
    title='Top 15 Contributors by Year (Log Scale)',
    width=800,
    height=600
)

contribution_bar_chart.show()

In [11]:
# %% [markdown]
# ### Combined Visualization

# %%
# 调整每张图的大小
expenditure_monthly_chart_small = expenditure_monthly_chart.properties(
    width=400,
    height=250
)

expenditure_bar_chart_small = expenditure_bar_chart.properties(
    width=400,
    height=350
)

contribution_monthly_chart_small = contribution_monthly_chart.properties(
    width=400,
    height=250
)

contribution_bar_chart_small = contribution_bar_chart.properties(
    width=400,
    height=350
)

# 组合图表：左侧上下排列（Expenditure）
left_charts = alt.vconcat(
    expenditure_monthly_chart_small,
    expenditure_bar_chart_small
).resolve_scale(
    color='independent'
)

# 组合图表：右侧上下排列（Contribution）
right_charts = alt.vconcat(
    contribution_monthly_chart_small,
    contribution_bar_chart_small
).resolve_scale(
    color='independent'
)

# 最终组合：左右排列
final_chart = alt.hconcat(
    left_charts,
    right_charts
).properties(
    title=alt.TitleParams(
        text='Expenditure and Contribution Analysis Dashboard',
        fontSize=20,
        anchor='middle'
    )
).configure_concat(
    spacing=20
)

final_chart.show()

# save the final chart as an HTML file with interaction
final_chart.save('expenditure_contribution_dashboard.html')


In [12]:
# %%
import pandas as pd
import numpy as np
import altair as alt
from vega_datasets import data

# %%
def extract_michigan_expenditures(file_path):
    df = pd.read_excel(file_path)
    df_clean = df.copy()
    df_clean['payee_state'] = df_clean['payee_state'].astype(str).str.strip().str.upper()
    condition = (df_clean['payee_state'] == 'MI') | (df_clean['payee_state'] == 'DETROIT')
    mi_df = df_clean[condition].copy()
    mi_df.loc[mi_df['payee_state'] == 'DETROIT', 'payee_state'] = 'MI'
    
    return mi_df

file_path = "Datasets/Expenditures/2025_mi_cfr_expenditures1.xlsx"

results = extract_michigan_expenditures(file_path)

print(type(results))
print(f"data shape: {results.shape}")


# %%
len(results)

# %%
def clean_payee_zip(results):
    print(f"清洗前总行数: {len(results):,}")
    
    results_clean = results.copy()
    results_clean = results_clean[results_clean['payee_zip'].notna()].copy()
    print(f"删除空值后: {len(results_clean):,} 行")
    
    results_clean['payee_zip'] = results_clean['payee_zip'].astype(str).str.strip()
    results_clean = results_clean[results_clean['payee_zip'].str.upper() != 'NAN'].copy()
    results_clean['payee_zip'] = results_clean['payee_zip'].str.replace(r'\D', '', regex=True)
    results_clean = results_clean[results_clean['payee_zip'] != ''].copy()
    results_clean['payee_zip'] = results_clean['payee_zip'].apply(lambda x: x[:5] if len(x) >= 5 else x)
    results_clean = results_clean[results_clean['payee_zip'].str.len() == 5].copy()
    results_clean = results_clean[results_clean['payee_zip'].str.isdigit()].copy()
    
    results_clean['payee_zip'] = pd.to_numeric(results_clean['payee_zip'], errors='coerce')
    results_clean = results_clean[results_clean['payee_zip'].notna()].copy()
    results_clean['payee_zip'] = results_clean['payee_zip'].astype(int)
    
    results_clean = results_clean[(results_clean['payee_zip'] >= 48000) & (results_clean['payee_zip'] < 50000)].copy()
    
    print(f"最终数据: {len(results_clean):,} 行")
    if len(results_clean) > 0:
        print(f"\nZip Code 示例:")
        print(results_clean['payee_zip'].head(10))
    
    return results_clean

results_clean_zip = clean_payee_zip(results)

print("\n" + "="*60)
print("清洗后的数据信息:")
print("="*60)
print(f"数据形状: {results_clean_zip.shape}")

if len(results_clean_zip) > 0:
    print(f"Payee Zip 数据类型: {results_clean_zip['payee_zip'].dtype}")
    print(f"Payee Zip 唯一值数量: {results_clean_zip['payee_zip'].nunique():,}")
    print(f"Payee Zip 范围: {results_clean_zip['payee_zip'].min()} - {results_clean_zip['payee_zip'].max()}")
else:
    print("警告: 没有数据！")


# %%
print(len(results_clean_zip))

# %%
# Read the zip to county mapping file
zip_county_fips = pd.read_csv('zip_county_fips.csv')

# Convert zip column to integer
zip_county_fips['zip'] = zip_county_fips['zip'].astype(int)

# Merge to add FIPS code
results_clean_zip = results_clean_zip.merge(
    zip_county_fips[['zip', 'County FIPS code']], 
    left_on='payee_zip', 
    right_on='zip', 
    how='left'
)

# Drop extra column and rename
results_clean_zip = results_clean_zip.drop(columns=['zip'])
results_clean_zip = results_clean_zip.rename(columns={'County FIPS code': 'payee_fips'})

# Convert payee_fips to integer (Int64 can handle NaN)
results_clean_zip['payee_fips'] = results_clean_zip['payee_fips'].astype('Int64')

# print(f"\nRecords with FIPS code: {results_clean_zip['payee_fips'].notna().sum():,}")
# print(f"Records without FIPS code: {results_clean_zip['payee_fips'].isna().sum():,}")
print("\nSample data:")
# results_clean_zip


# %%

results_clean_zip = results_clean_zip[
    results_clean_zip['amount'].notna() & 
    (results_clean_zip['amount'].astype(str).str.strip() != '')
]


results_clean_zip['amount'] = results_clean_zip['amount'].astype(float)


print(f"清理后的数据行数: {len(results_clean_zip)}")
print(f"amount列数据类型: {results_clean_zip['amount'].dtype}")

# %%
results_clean_zip_copy = results_clean_zip.copy()

# %%
# 读取人口数据


population = pd.read_csv('county_pop_FIPS.csv')

# 确保fips列的数据类型正确
population['fips'] = population['fips'].astype(int)

# 去掉逗号，然后转换为数值
population['pop_2020'] = population['pop_2020'].astype(str).str.replace(',', '').str.replace(' ', '')
population['pop_2020'] = pd.to_numeric(population['pop_2020'], errors='coerce')

# 先删除payee_fips为空的行
results_clean_zip = results_clean_zip[results_clean_zip['payee_fips'].notna()]

# 然后转换为整数
results_clean_zip['payee_fips'] = results_clean_zip['payee_fips'].astype(int)

# 【先合并】人口数据到results_clean_zip
results_clean_zip = results_clean_zip.merge(
    population[['fips', 'pop_2020']], 
    left_on='payee_fips', 
    right_on='fips', 
    how='left'
)

# 【再计算】人均amount
results_clean_zip['amount_per_capita'] = results_clean_zip['amount'] / results_clean_zip['pop_2020']

# 删除辅助列（可选）
results_clean_zip = results_clean_zip.drop(columns=['fips'])

# 查看结果
print(f"合并后的数据行数: {len(results_clean_zip)}")
print(f"有人口数据的行数: {results_clean_zip['pop_2020'].notna().sum()}")
print(results_clean_zip[['payee_fips', 'amount', 'pop_2020', 'amount_per_capita']].head(10))


# %%
results_clean_zip.head()

# %%

counties = alt.topo_feature(data.us_10m.url, 'counties')
click = alt.selection_point(fields=['id'])

# Filter out invalid values
county_amount = results_clean_zip.groupby('payee_fips')['amount_per_capita'].sum().reset_index()
county_amount.columns = ['id', 'total_amount']
county_amount = county_amount[county_amount['total_amount'] >= 0]  # Remove negative values
county_amount = county_amount.dropna(subset=['total_amount'])  # Remove NaN values

# Apply log transformation
county_amount['log_total_amount'] = np.log1p(county_amount['total_amount'])

# Create the choropleth chart
michigan_choropleth = alt.Chart(counties).mark_geoshape(
    stroke='black',
    strokeWidth=0.5
).transform_filter(
    (alt.datum.id >= 26000) & (alt.datum.id < 27000)
).transform_lookup(
    lookup='id',
    from_=alt.LookupData(county_amount, 'id', ['log_total_amount', 'total_amount'])
).encode(
    color=alt.Color('log_total_amount:Q', 
                    scale=alt.Scale(scheme='yellowgreenblue'),
                    legend=alt.Legend(title='Log Total Amount', orient='right')),
    opacity=alt.condition(click, alt.value(1), alt.value(0.4)),
    tooltip=[
        alt.Tooltip('id:N', title='County FIPS'),
        alt.Tooltip('total_amount:Q', title='Total Amount', format='$,.2f'),
        alt.Tooltip('log_total_amount:Q', title='Log Total Amount', format='.2f')
    ]
).project(
    type='albersUsa'
).properties(
    width=600,
    height=500,
    title='Michigan Counties - Log Average Amount by County'
).add_params(
    click
)

michigan_choropleth




<class 'pandas.core.frame.DataFrame'>
data shape: (28566, 37)
清洗前总行数: 28,566
删除空值后: 27,874 行
最终数据: 27,708 行

Zip Code 示例:
5     48933
7     48116
12    48836
13    48933
20    48843
26    48116
27    48843
36    48933
37    48116
38    48933
Name: payee_zip, dtype: int64

清洗后的数据信息:
数据形状: (27708, 37)
Payee Zip 数据类型: int64
Payee Zip 唯一值数量: 743
Payee Zip 范围: 48001 - 49969
27708

Sample data:
清理后的数据行数: 27697
amount列数据类型: float64
合并后的数据行数: 27615
有人口数据的行数: 27615
   payee_fips    amount  pop_2020  amount_per_capita
0       26065    100.00    290427           0.000344
1       26093   1160.00    196976           0.005889
2       26093    300.00    196976           0.001523
3       26065     46.56    290427           0.000160
4       26093    463.92    196976           0.002355
5       26093     74.67    196976           0.000379
6       26093  11242.76    196976           0.057077
7       26065    218.00    290427           0.000751
8       26093    851.51    196976           0.004323
9       2

In [13]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
合并两个Vega-Lite可视化HTML文件
"""

def merge_html_files(file1_path, file2_path, output_path):
    """
    合并两个HTML文件中的Vega-Lite可视化
    
    参数:
        file1_path: 第一个HTML文件路径
        file2_path: 第二个HTML文件路径
        output_path: 输出文件路径
    """
    
    # 读取两个HTML文件
    with open(file1_path, 'r', encoding='utf-8') as f:
        html1 = f.read()
    
    with open(file2_path, 'r', encoding='utf-8') as f:
        html2 = f.read()
    
    # 提取第一个文件的spec
    import re
    
    # 提取第一个文件的spec
    spec1_match = re.search(r'var spec = ({.*?});', html1, re.DOTALL)
    spec1 = spec1_match.group(1) if spec1_match else '{}'
    
    # 提取第二个文件的spec
    spec2_match = re.search(r'var spec = ({.*?});', html2, re.DOTALL)
    spec2 = spec2_match.group(1) if spec2_match else '{}'
    
    # 创建合并后的HTML
    merged_html = f"""<!DOCTYPE html>
<html>
<head>
  <meta charset="UTF-8">
  <title>Michigan Political Finance Dashboard</title>
  <style>
    body {{
      font-family: Arial, sans-serif;
      margin: 0;
      padding: 20px;
      background-color: #f5f5f5;
    }}
    
    .container {{
      max-width: 1400px;
      margin: 0 auto;
      background-color: white;
      padding: 20px;
      box-shadow: 0 2px 4px rgba(0,0,0,0.1);
    }}
    
    h1 {{
      text-align: center;
      color: #333;
      margin-bottom: 30px;
      font-size: 28px;
    }}
    
    .visualization {{
      margin-bottom: 40px;
      border: 1px solid #ddd;
      padding: 20px;
      border-radius: 5px;
      background-color: #fafafa;
    }}
    
    .vis-title {{
      font-size: 20px;
      font-weight: bold;
      color: #555;
      margin-bottom: 15px;
      padding-bottom: 10px;
      border-bottom: 2px solid #007bff;
    }}

    #vis1.vega-embed,
    #vis2.vega-embed {{
      width: 100%;
      display: flex;
    }}

    #vis1.vega-embed details,
    #vis1.vega-embed details summary,
    #vis2.vega-embed details,
    #vis2.vega-embed details summary {{
      position: relative;
    }}
  </style>
  <script type="text/javascript" src="https://cdn.jsdelivr.net/npm/vega@5"></script>
  <script type="text/javascript" src="https://cdn.jsdelivr.net/npm/vega-lite@5.20.1"></script>
  <script type="text/javascript" src="https://cdn.jsdelivr.net/npm/vega-embed@6"></script>
</head>
<body>
  <div class="container">
    <h1>Michigan Political Finance Analysis Dashboard</h1>
    
    <!-- 第一个可视化：支出和捐款分析 -->
    <div class="visualization">
      <div class="vis-title">📊 Expenditure and Contribution Analysis</div>
      <div id="vis1"></div>
    </div>
    
    <!-- 第二个可视化：县级地图 -->
    <div class="visualization">
      <div class="vis-title">🗺️ Geographic Distribution by County</div>
      <div id="vis2"></div>
    </div>
  </div>

  <script>
    (function(vegaEmbed) {{
      // 第一个图表的规范
      var spec1 = {spec1};
      
      // 第二个图表的规范
      var spec2 = {spec2};
      
      var embedOpt = {{"mode": "vega-lite"}};

      function showError(el, error){{
          el.innerHTML = ('<div style="color:red;">'
                          + '<p>JavaScript Error: ' + error.message + '</p>'
                          + "<p>This usually means there's a typo in your chart specification. "
                          + "See the javascript console for the full traceback.</p>"
                          + '</div>');
          throw error;
      }}
      
      // 渲染第一个图表
      const el1 = document.getElementById('vis1');
      vegaEmbed("#vis1", spec1, embedOpt)
        .catch(error => showError(el1, error));
      
      // 渲染第二个图表
      const el2 = document.getElementById('vis2');
      vegaEmbed("#vis2", spec2, embedOpt)
        .catch(error => showError(el2, error));
        
    }})(vegaEmbed);

  </script>
</body>
</html>"""
    
    # 写入输出文件
    with open(output_path, 'w', encoding='utf-8') as f:
        f.write(merged_html)
    
    print(f"✅ 成功合并HTML文件到: {output_path}")


if __name__ == "__main__":
    # 使用示例
    file2 = "expenditure_contribution_dashboard.html"  # 第一个HTML文件路径
    file1 = "michigan_choropleth.html"  # 第二个HTML文件路径
    output = "merged_dashboard.html"  # 输出文件路径
    
    try:
        merge_html_files(file1, file2, output)
        print(f"\n🎉 合并完成！请在浏览器中打开 {output} 查看结果。")
    except FileNotFoundError as e:
        print(f"❌ 错误: 找不到文件 - {e}")
    except Exception as e:
        print(f"❌ 发生错误: {e}")


✅ 成功合并HTML文件到: merged_dashboard.html

🎉 合并完成！请在浏览器中打开 merged_dashboard.html 查看结果。
