In [7]:
import pandas as pd
import re
import io

def parse_plasma_data(num,data_string):
    # 데이터를 줄 단위로 분리
    lines = [line.strip() for line in data_string.split('\n') if line.strip()]
    
    # 메타데이터 추출
    type_ = lines[0]
    reaction = lines[1]
    eloss = float(lines[2])  # ELoss 값 추출
    species = re.search(r'SPECIES: (.+)', lines[3]).group(1)
    formula = re.search(r'PROCESS: (.+?),', lines[4]).group(1)
    
    # 데이터 부분 추출
    try:
        data_start = lines.index('-----------------------------') + 1
        data_end = lines[data_start:].index('-----------------------------') + data_start
        data_lines = lines[data_start:data_end]
    except ValueError:
        print("Warning: Could not find data section. Using all lines after metadata.")
        data_lines = lines[8:]  # Assuming metadata ends at line 8
    
    # 데이터를 DataFrame으로 변환
    data_io = io.StringIO('\n'.join(data_lines))
    df = pd.read_csv(data_io, sep=r'\s+', names=['Energy(eV)', 'Cross_section(m2)'])
    
    # 메타데이터 추가
    df['Num'] = num
    df['Type'] = type_
    df['Reaction'] = reaction
    df['ELoss'] = eloss  # 새로운 ELoss 열 추가
    df['Species'] = species
    df['Formula'] = formula
    
    # 열 순서 재배열
    df = df[['Num', 'Type', 'Reaction', 'ELoss', 'Species', 'Formula', 'Energy(eV)', 'Cross_section(m2)']]
    
    return df

In [8]:
# 테스트 데이터
data_string = """
IONIZATION	
H2 -> H2^+	
1.54E+01	
SPECIES: e / H2	
PROCESS: E + H2 -> E + E + H2+, Ionization	
PARAM.:  E = 15.4 eV	
COMMENT: check energy loss ?.	
UPDATED: 2012-10-17 07:25:21	
COLUMNS: Energy (eV) | Cross section (m2)	
-----------------------------	
1.54E+01	0.00E+00
1.60E+01	2.99E-22
1.65E+01	6.07E-22
1.70E+01	9.24E-22
1.75E+01	1.23E-21
1.80E+01	1.56E-21
1.85E+01	1.87E-21
1.90E+01	2.20E-21
1.95E+01	2.49E-21
2.00E+01	2.80E-21
2.05E+01	3.10E-21
2.10E+01	3.36E-21
2.15E+01	3.62E-21
2.20E+01	3.90E-21
2.25E+01	4.14E-21
2.30E+01	4.39E-21
2.35E+01	4.61E-21
2.40E+01	4.84E-21
2.45E+01	5.05E-21
2.50E+01	5.24E-21
3.00E+01	6.51E-21
3.50E+01	7.60E-21
4.00E+01	8.40E-21
4.50E+01	8.80E-21
5.00E+01	9.08E-21
5.50E+01	9.31E-21
6.00E+01	9.44E-21
6.50E+01	9.48E-21
7.00E+01	9.49E-21
7.50E+01	9.44E-21
8.00E+01	9.37E-21
8.50E+01	9.28E-21
9.00E+01	9.19E-21
9.50E+01	9.11E-21
1.00E+02	9.00E-21
1.10E+02	8.72E-21
1.20E+02	8.52E-21
1.40E+02	8.06E-21
1.60E+02	7.63E-21
1.80E+02	7.14E-21
2.00E+02	6.76E-21
2.25E+02	6.36E-21
2.50E+02	5.96E-21
2.75E+02	5.56E-21
3.00E+02	5.29E-21
3.50E+02	4.77E-21
4.00E+02	4.36E-21
4.50E+02	3.98E-21
5.00E+02	3.73E-21
5.50E+02	3.39E-21
6.00E+02	3.17E-21
6.50E+02	3.02E-21
7.00E+02	2.83E-21
7.50E+02	2.72E-21
8.00E+02	2.57E-21
8.50E+02	2.48E-21
9.00E+02	2.36E-21
9.50E+02	2.22E-21
1.00E+03	2.11E-21
-----------------------------				
"""
# 함수 실행
df = parse_plasma_data(249,data_string)
# CSV 파일로 저장
df.to_csv('./ZDPlaskin/kinetic_DB/reaction_%d.csv'%df['Num'].loc[0], index=False)

In [9]:
df

Unnamed: 0,Num,Type,Reaction,ELoss,Species,Formula,Energy(eV),Cross_section(m2)
0,249,IONIZATION,H2 -> H2^+,15.4,e / H2,E + H2 -> E + E + H2+,15.4,0.0
1,249,IONIZATION,H2 -> H2^+,15.4,e / H2,E + H2 -> E + E + H2+,16.0,2.99e-22
2,249,IONIZATION,H2 -> H2^+,15.4,e / H2,E + H2 -> E + E + H2+,16.5,6.0700000000000005e-22
3,249,IONIZATION,H2 -> H2^+,15.4,e / H2,E + H2 -> E + E + H2+,17.0,9.24e-22
4,249,IONIZATION,H2 -> H2^+,15.4,e / H2,E + H2 -> E + E + H2+,17.5,1.23e-21
5,249,IONIZATION,H2 -> H2^+,15.4,e / H2,E + H2 -> E + E + H2+,18.0,1.56e-21
6,249,IONIZATION,H2 -> H2^+,15.4,e / H2,E + H2 -> E + E + H2+,18.5,1.87e-21
7,249,IONIZATION,H2 -> H2^+,15.4,e / H2,E + H2 -> E + E + H2+,19.0,2.2e-21
8,249,IONIZATION,H2 -> H2^+,15.4,e / H2,E + H2 -> E + E + H2+,19.5,2.49e-21
9,249,IONIZATION,H2 -> H2^+,15.4,e / H2,E + H2 -> E + E + H2+,20.0,2.8e-21
