In [6]:
import pandas as pd

# 加载数据
file_path = "C:/Users/Leo/Desktop/DH1/rooms_iconclasses.csv"
data = pd.read_csv(file_path)

# 删除缺失值
data_cleaned = data.dropna(subset=['dating_start', 'dating_end', 'room_functions', 'iconclasses'])

# 拆分房间功能
data_cleaned.loc[:, 'room_functions'] = data_cleaned['room_functions'].str.split(',')
data_exploded = data_cleaned.explode('room_functions')

# 转换年份为数值型
data_exploded['dating_start'] = pd.to_numeric(data_exploded['dating_start'], errors='coerce')
data_exploded['dating_end'] = pd.to_numeric(data_exploded['dating_end'], errors='coerce')

# 添加数据验证步骤
print("数据验证:")
print("最早的开始年份:", data_exploded['dating_start'].min())
print("最晚的结束年份:", data_exploded['dating_end'].max())

# 修改后的筛选条件
data_1550_1800 = data_exploded[
    (data_exploded['dating_end'] >= 1550) & 
    (data_exploded['dating_start'] >= 1550) & 
    (data_exploded['dating_start'] <= 1800) &
    (data_exploded['dating_end'] <= 1800)
]

data_1600_1800 = data_exploded[
    (data_exploded['dating_end'] >= 1600) & 
    (data_exploded['dating_start'] >= 1600) & 
    (data_exploded['dating_start'] <= 1800) &
    (data_exploded['dating_end'] <= 1800)
]

# 验证筛选后的数据范围
print("\n1600-1800数据集验证:")
print("筛选后最早的开始年份:", data_1600_1800['dating_start'].min())
print("筛选后最晚的结束年份:", data_1600_1800['dating_end'].max())

# 输出每个时期的数据量
print("\n数据量统计:")
print("原始数据条数:", len(data_exploded))
print("1550-1800时期数据条数:", len(data_1550_1800))
print("1600-1800时期数据条数:", len(data_1600_1800))

# 保存结果
data_1550_1800.to_csv('rooms_1550_1800.csv', index=False)
data_1600_1800.to_csv('rooms_1600_1800.csv', index=False)
data_exploded.to_csv('rooms_cleaned.csv', index=False)

数据验证:
最早的开始年份: 1167.0
最晚的结束年份: 1943.0

1600-1800数据集验证:
筛选后最早的开始年份: 1600.0
筛选后最晚的结束年份: 1796.0

数据量统计:
原始数据条数: 423
1550-1800时期数据条数: 414
1600-1800时期数据条数: 398


In [11]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

def create_iconclass_heatmap(df, title_suffix="", save_path=None, save_svg=False):
    """
    为单个数据框创建热力图
    
    参数:
    df: 包含 room_functions 和 iconclassses_level_0 列的 DataFrame
    title_suffix: 标题后缀，用于区分不同的热力图
    save_path: 保存文件的路径（不包含扩展名）
    save_svg: 是否保存SVG格式
    """
    # 将 iconclassses_level_0 字符串转换为列表
    df['iconclass_numbers'] = df['iconclassses_level_0'].str.split(',')
    
    # 展开 iconclass 数字列表
    exploded_df = df.explode('iconclass_numbers')
    
    # 创建频率矩阵
    freq_matrix = pd.crosstab(exploded_df['room_functions'], 
                             exploded_df['iconclass_numbers'])
    
    # 确保包含所有数字 1-9
    for i in range(1, 10):
        if str(i) not in freq_matrix.columns:
            freq_matrix[str(i)] = 0
    
    # 排序列
    freq_matrix = freq_matrix.reindex(sorted(freq_matrix.columns), axis=1)
    
    # 创建图形
    plt.figure(figsize=(15, 12))
    
    # 创建热力图
    sns.heatmap(freq_matrix, 
                cmap='YlGnBu',
                annot=True,
                fmt='d',
                cbar_kws={'label': 'Frequency'},
                vmax=600)
    
    plt.title(f'Frequency of Iconclass Level 0 by Room Functions {title_suffix}')
    plt.xlabel('Iconclass Level 0')
    plt.ylabel('Room Functions')
    
    # 调整布局
    plt.tight_layout()
    
    # 保存图形
    if save_path:
        # 添加后缀以区分不同的图
        full_path = f'{save_path}_{title_suffix.lower().replace(" ", "_")}'
        # 保存PNG版本
        plt.savefig(f'{full_path}.png', dpi=300, bbox_inches='tight')
        # 保存SVG版本
        if save_svg:
            plt.savefig(f'{full_path}.svg', format='svg', bbox_inches='tight')
    
    plt.close()  # 关闭图形，避免内存泄漏
    return freq_matrix

def process_multiple_datasets(dataframes, names, base_save_path=None, save_svg=False):
    """
    处理多个数据框并为每个创建热力图
    
    参数:
    dataframes: DataFrame列表
    names: 每个DataFrame对应的名称列表
    base_save_path: 基础保存路径
    save_svg: 是否保存SVG格式
    """
    results = {}
    for df, name in zip(dataframes, names):
        results[name] = create_iconclass_heatmap(
            df,
            title_suffix=f"({name})",
            save_path=base_save_path,
            save_svg=save_svg
        )
    return results


df1 = pd.read_csv('C:/Users/Leo/Desktop/DH1/0126/rooms_1550_1800.csv')
df2 = pd.read_csv('C:/Users/Leo/Desktop/DH1/0126/rooms_1600_1800.csv')
df3 = pd.read_csv('C:/Users/Leo/Desktop/DH1/0126/rooms_cleaned.csv')


results = process_multiple_datasets(
    dataframes=[df1, df2, df3],
    names=['level0_1550-1800', 'level0_1600-1800', 'level0_full'],
    base_save_path='iconclass_heatmap',
    save_svg=True
)




In [12]:
# 在热力图生成后添加:
for name, matrix in results.items():
    print(f"\n{name}:")
    print(f"Number of rows: {len(matrix.index)}")
    print("\nRoom functions:")
    for room in matrix.index:
        print(f"- {room}")


level0_1550-1800:
Number of rows: 35

Room functions:
- ANTEROOMS
- ANTE_HALLS
- BANQUET_HALLS
- BATHROOMS
- BEDROOMS
- BOARDROOMS
- CABINETS_ROOMS
- CHAPELS_ROOMS
- CORRIDORS
- COUNCIL_CHAMBERS
- COUNTING_ROOMS
- DINING_ROOMS
- GALLERIES_DISPLAY_SPACES
- GUARD_HALLS
- GUEST_ROOMS
- HALLS
- IMPERIAL_HALLS
- LIBRARIES_ROOMS
- LOGGIAS
- LONG_GALLERIES
- MAIN_HALLS
- MANSION_DINING_ROOMS
- OFFICES_WORK_SPACES
- PATRONAGE_LODGES
- PRESENCE_CHAMBERS
- PRIVATE_CHAPELS
- ROYAL_CHAPELS_ROOMS
- SALA_TERRENA
- SALONS_ROOMS_FOR_ENTERTAINING
- SITTING_ROOMS
- STAIRWELLS
- THEATERS_ROOMS
- TOILET_ROOMS
- VESTIBULES
- WARDROBES_ROOMS

level0_1600-1800:
Number of rows: 34

Room functions:
- ANTEROOMS
- ANTE_HALLS
- BANQUET_HALLS
- BATHROOMS
- BEDROOMS
- BOARDROOMS
- CABINETS_ROOMS
- CHAPELS_ROOMS
- CORRIDORS
- COUNCIL_CHAMBERS
- COUNTING_ROOMS
- DINING_ROOMS
- GALLERIES_DISPLAY_SPACES
- GUARD_HALLS
- GUEST_ROOMS
- HALLS
- IMPERIAL_HALLS
- LIBRARIES_ROOMS
- LONG_GALLERIES
- MAIN_HALLS
- MANSION_DININ