# 准备数据 from NIST https://www.nist.gov/pml/x-ray-mass-attenuation-coefficients

## data download

In [None]:
import requests
import re
import os

# 创建存放XrayMassCoef_html的文件夹
XrayMassCoef_html_lists = "Data/XrayMassCoef_html_lists"
os.makedirs(XrayMassCoef_html_lists, exist_ok=True)

# 网页的URL
content_url = "https://physics.nist.gov/PhysRefData/XrayMassCoef/tab3.html"
constant_url = "https://physics.nist.gov/PhysRefData/XrayMassCoef/tab1.html"
content_file_name = r"Data/X-Ray Mass Attenuation Coefficients content.html"
constant_file_name = r"Data/Element constant.html"
# 使用requests库下载网页
content_response = requests.get(content_url)
constant_response = requests.get(constant_url)

# 将网页内容保存到content_file
with open(content_file_name, "w", encoding='utf-8') as file:
    file.write(content_response.text)
with open(constant_file_name, "w", encoding='utf-8') as file:
    file.write(constant_response.text)


# 读取content_file
with open(content_file_name, "r", encoding='utf-8') as file:
    content = file.read()

# 使用正则表达式匹配
pattern = r'<TD>(.*?)</TD>\s*<TD><A href="(.*?)">(.*?)</A></TD>'
matches = re.findall(pattern, content)


# 对于每个匹配的元素，下载网页并保存为文件
for match in matches:
    symbol, href, name = match
    url = f"https://physics.nist.gov/PhysRefData/XrayMassCoef/{href}"
    response = requests.get(url)
    filename = XrayMassCoef_html_lists+f"/{symbol}_{name}_XrayMassCoef.html"
    with open(filename, "w", encoding='utf-8') as file:
        file.write(response.text)

## 整理数据格式

In [None]:
# 获取元素常数信息

from bs4 import BeautifulSoup
import pandas as pd

# 创建BeautifulSoup对象
soup = BeautifulSoup(content, 'html.parser')

# 找到所有的<tr>标签
rows = soup.find_all('tr')

# 创建一个空的DataFrame来存储提取的信息
df = pd.DataFrame(columns=['Z', 'Symbol', 'Z/A', 'Density'])

# 遍历每一行
elementZA = {}
for row in rows:
    # 在当前行中找到所有的<td>标签
    cols = row.find_all('td')
    # 如果找到的<td>标签数量不足，跳过当前行
    if len(cols) < 6:
        continue
    # 提取需要的信息
    Z = cols[0].text.strip()
    symbol = cols[1].text.strip()
    z_a = cols[3].text.strip()
    density = cols[5].text.strip()
    # 将提取的信息添加到DataFrame中
    elementZA[Z] = (z_a, density) # 保存Z/A和Density
    df = pd.concat([df, pd.DataFrame({'Z': [Z], 'Symbol': [symbol], 'Z/A': [z_a], 'Density': [density]})], ignore_index=True)

# 打印DataFrame
print(df)

elementZA['1'] = ("0.99212","8.375E-05") # 特殊情况的特殊处理

In [None]:
# 获取元素X-ray Mass Attenuation Coefficients信息

import os
from bs4 import BeautifulSoup
import pandas as pd

# 注意 C 元素的名称是 Carbon, 而不是 Carbon (graphite)，后者需要写复杂的逻辑处理，所以我手动处理了源html文件
# 注意，Be元素的格式提取有点问题， 执行下面代码后手动看一看

XrayMassCoef_html_lists = "Data/XrayMassCoef_html_lists"
XrayMassCoef_csv_lists = "Data/XrayMassCoef_csv_lists"
os.makedirs(XrayMassCoef_csv_lists, exist_ok=True)

# 获取XrayMassCoef_html_lists文件夹中的所有文件
files = os.listdir(XrayMassCoef_html_lists)

Zlist = []
title = ['Energy (MeV)', 'μ/ρ (cm2/g)', 'μen/ρ (cm2/g)']
# 对于每个文件，提取<PRE>标签内的内容并保存为CSV文件
for file in files:
    with open(XrayMassCoef_html_lists+f"/{file}", "r", encoding='utf-8') as datafile:
        content = datafile.read()
    soup = BeautifulSoup(content, 'html.parser')
    Z = soup.find('b').get_text().split()[3]
    pre_content = soup.find('pre').get_text()
    data = []
    for line in pre_content.split('\n'):
        if line:
            spilt_list = line.split()
            if len(spilt_list) == 4:
                data.append(spilt_list[1:])
            if len(spilt_list) <= 3:
                data.append(spilt_list)

    df = pd.DataFrame(data[4:], columns=title)
    symbol, name, _ = file.split('_')
    Zlist.append(Z)
    A = float(Z) / float(elementZA[Z][0]) 
    print(f"/{symbol}_{name}_{Z}_{A:.2f}_XrayMassCoef.csv")
    df.to_csv(XrayMassCoef_csv_lists+f"/{symbol}_{name}_{Z}_{A:.2f}_XrayMassCoef.csv", index=False)

# data download from https://physics.nist.gov

In [3]:
import requests
import os

# POST请求的URL和payload
url = "https://physics.nist.gov/cgi-bin/Xcom/data.pl"
payload = {
    "character": "space",
    "Method": "1",
    "ZNum": "1",
    "OutOpt": "PIC",
    "NumAdd": "1",
    "Energies": "",
    "Output": "on",
    "WindowXmin": "0.001",
    "WindowXmax": "100000",
    "photoelectric": "on",
    "coherent": "on",
    "incoherent": "on",
    "nuclear": "on",
    "electron": "on",
    "with": "on",
    "without": "on"
}

# 设置User-Agent
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0"
}

# 发送POST请求
response = requests.post(url, data=payload, headers=headers)

# 确保newData文件夹存在
os.makedirs('newData', exist_ok=True)

# 将响应内容保存到文件
with open('newData/response.txt', 'w') as f:
    f.write(response.text)

In [4]:
# 从 XRayTransmissionCalculator\Data\XrayMassCoef_csv_lists 获取所有文件的文件名，以Se_Selenium_34_78.96_XrayMassCoef.csv为例，使用_分隔，提取出来前4个分别是 symbol, name, Z, A
# 新建一个结构体，element，包含symbol, name, Z, A。
# 用一个列表保存所有的element

import os
from collections import namedtuple

# 定义element结构体
Element = namedtuple('Element', ['symbol', 'name', 'Z', 'A'])

XrayMassCoef_csv_lists = "Data/XrayMassCoef_csv_lists"
files = os.listdir(XrayMassCoef_csv_lists)

# 创建所有的element
elements = []
for file in files:
    parts = file.split('_')
    if len(parts) >= 4:
        symbol, name, Z, A = parts[:4]
        element = Element(symbol, name, Z, A)
        elements.append(element)


[Element(symbol='Ac', name='Actinium', Z='89', A='227.03'),
 Element(symbol='Ag', name='Silver', Z='47', A='107.87'),
 Element(symbol='Al', name='Aluminum', Z='13', A='26.98'),
 Element(symbol='Ar', name='Argon', Z='18', A='39.95'),
 Element(symbol='As', name='Arsenic', Z='33', A='74.92'),
 Element(symbol='At', name='Astatine', Z='85', A='209.99'),
 Element(symbol='Au', name='Gold', Z='79', A='196.97'),
 Element(symbol='B ', name='Boron', Z='5', A='10.81'),
 Element(symbol='Ba', name='Barium', Z='56', A='137.33'),
 Element(symbol='Be', name='Beryllium', Z='4', A='9.01'),
 Element(symbol='Bi', name='Bismuth', Z='83', A='208.98'),
 Element(symbol='Br', name='Bromine', Z='35', A='79.90'),
 Element(symbol='C ', name='Carbon', Z='6', A='12.01'),
 Element(symbol='Ca', name='Calcium', Z='20', A='40.08'),
 Element(symbol='Cd', name='Cadmium', Z='48', A='112.41'),
 Element(symbol='Ce', name='Cerium', Z='58', A='140.11'),
 Element(symbol='Cl', name='Chlorine', Z='17', A='35.45'),
 Element(symbol