In [1]:
import pandas as pd

# 加载数据
xref_file = "opamps-xref.csv"  # xref 文件路径
features_file = "opamps-features.csv"  # features 文件路径

# 读取 opamps-xref.csv 和 opamps-features.csv
xref_data = pd.read_csv(xref_file)
features_data = pd.read_csv(features_file)

# 初始化 not_found 列表
not_found = []

# 遍历 opamps-xref.csv 数据
for index, row in xref_data.iterrows():
    # 从 xref 获取 STMicro 元件的 MPN 和 Name
    st_mpn = row['STMicro MPN']
    st_name = row['STMicro Name']
    
    # 从 xref 获取 Competitor 元件的 MPN 和 Name
    competitor_mpn = row['Competitor MPN']
    competitor_name = row['Competitor Name']
    
    # 检查 STMicro 元件是否在 features 数据中
    st_match = features_data[
        (features_data['MPN'] == st_mpn) & 
        (features_data['MANUFACTURER'] == st_name)
    ]
    
    if st_match.empty:
        # 如果没有找到，记录 STMicro 元件信息到 not_found
        not_found.append({'MPN': st_mpn, 'MANUFACTURER': st_name})
    
    # 检查 Competitor 元件是否在 features 数据中
    competitor_match = features_data[
        (features_data['MPN'] == competitor_mpn) & 
        (features_data['MANUFACTURER'] == competitor_name)
    ]
    
    if competitor_match.empty:
        # 如果没有找到，记录 Competitor 元件信息到 not_found
        not_found.append({'MPN': competitor_mpn, 'MANUFACTURER': competitor_name})

# 输出未找到的元件
if not not_found:
    print("所有元件均已找到对应特征！")
else:
    print("以下元件未找到对应特征：")
    for item in not_found:
        print(f"MPN: {item['MPN']}, MANUFACTURER: {item['MANUFACTURER']}")


以下元件未找到对应特征：
MPN: PN-103742, MANUFACTURER: MN-1036
MPN: PN-103742, MANUFACTURER: MN-1036
MPN: PN-103742, MANUFACTURER: MN-1036
MPN: PN-103742, MANUFACTURER: MN-1036
MPN: PN-103742, MANUFACTURER: MN-1036
MPN: PN-103742, MANUFACTURER: MN-1036
MPN: PN-103742, MANUFACTURER: MN-1036
MPN: PN-103742, MANUFACTURER: MN-1036
MPN: PN-103742, MANUFACTURER: MN-1036
MPN: PN-103742, MANUFACTURER: MN-1036
MPN: PN-103742, MANUFACTURER: MN-1036
MPN: PN-103742, MANUFACTURER: MN-1036
MPN: PN-103742, MANUFACTURER: MN-1036
MPN: PN-103742, MANUFACTURER: MN-1036
MPN: PN-103742, MANUFACTURER: MN-1036
MPN: PN-103742, MANUFACTURER: MN-1036
MPN: PN-103742, MANUFACTURER: MN-1036
MPN: PN-103742, MANUFACTURER: MN-1036
MPN: PN-103742, MANUFACTURER: MN-1036
MPN: PN-103742, MANUFACTURER: MN-1036
MPN: PN-103742, MANUFACTURER: MN-1036
MPN: PN-103742, MANUFACTURER: MN-1036
MPN: PN-103742, MANUFACTURER: MN-1036
MPN: PN-103742, MANUFACTURER: MN-1036
MPN: PN-103742, MANUFACTURER: MN-1036
MPN: PN-103742, MANUFACTURER: MN-1036

In [4]:
# 将 not_found 列表中的字典转换为集合，利用集合的唯一性去重
unique_not_found = [dict(t) for t in {tuple(d.items()) for d in not_found}]

# 打印最终去重后的 not_found 列表和数量
print(f"最终 not_found 中元素数量：{len(unique_not_found)}")
for item in unique_not_found:
    print(f"MPN: {item['MPN']}, MANUFACTURER: {item['MANUFACTURER']}")

最终 not_found 中元素数量：4
MPN: PN-1016310, MANUFACTURER: MN-1036
MPN: PN-1016177, MANUFACTURER: MN-1036
MPN: PN-1017517, MANUFACTURER: MN-100
MPN: PN-103742, MANUFACTURER: MN-1036


In [5]:
# 将 unique_not_found 转换为 Pandas DataFrame
not_found_df = pd.DataFrame(unique_not_found)

# 将 DataFrame 保存为 CSV 文件
output_file = "not_found.csv"  # 输出文件名
not_found_df.to_csv(output_file, index=False)

In [6]:
import pandas as pd

# 文件路径
xref_file = "opamps-xref.csv"  
not_found_file = "not_found.csv"  
output_file = "opamps-xref-cleaned.csv"  

# 加载数据
xref_data = pd.read_csv(xref_file)
not_found_data = pd.read_csv(not_found_file)

# 创建 not_found 的唯一键集合，用于快速查找
not_found_set = set(
    zip(not_found_data["MPN"], not_found_data["MANUFACTURER"])
)

# 遍历 opamps-xref.csv，保留不在 not_found.csv 中的行
cleaned_data = xref_data[
    ~xref_data.apply(
        lambda row: (
            (row["STMicro MPN"], row["STMicro Name"]) in not_found_set or
            (row["Competitor MPN"], row["Competitor Name"]) in not_found_set
        ), axis=1
    )
]

# 保存清理后的数据到新的文件
cleaned_data.to_csv(output_file, index=False)