## Step 1: Fill Excel ("DoesCal.xlsx"):
    In Sheet "Initial", fill all cells in yellow with fixed units.

## Step 2: Save PACE4 results in Folder "PACE4":
    Make sure the beam energy for PACE4 is same as the one in Excel Sheet "Initial"

## Step 3: Run the code to copy info from PACE4 result to "DoesCal.xlsx"

In [1]:
import pdfplumber
import os
import re
import pandas as pd
from openpyxl import load_workbook
from openpyxl.utils import get_column_letter
from openpyxl.styles import Alignment  # Ensure Alignment is imported

#### Code: extract Compound Info from PDF

In [2]:
def Extract_CompoundInf(text):
    # Return Values:
    Coulomb_Barrier = -1
    Center_Mass = -1
    Recoil_Energy = -1
    Beam_Energy = -1
    
    lines = text.split('\n')
    # loop all lines in this text(page)
    for i, line in enumerate(lines):
        if "Bass fusion xsection for E = " in line:
            match = re.search(r'Bass fusion xsection for E = (\d+(\.\d+)?)\s*MeV', line)
            if match:
                Beam_Energy = float(match.group(1))
                continue
        if "Barrier height" in line:
            match = re.search(r'Barrier height is (\d+(\.\d+)?)\s*MeV', line)
            if match:
                Coulomb_Barrier = float(match.group(1))
                continue
        if "Center of mass energy (MeV)" in line:
            match = re.search(r'Center of mass energy \(MeV\)\s*(\d+(\.\d+)?)', line)
            if match:
                Center_Mass = float(match.group(1))
                continue
        if "Compound nucleus recoil energy (MeV)" in line:
            match = re.search(r'Compound nucleus recoil energy \(MeV\)\s*(\d+(\.\d+)?)', line)
            if match:
                Recoil_Energy = float(match.group(1))
                break

    if Beam_Energy<0:
        print("Beam energy NOT Found")
    if Coulomb_Barrier<0:
        print("Barrier height NOT Found")
    if Center_Mass<0:
        print("Center of mass energy NOT Found")
    if Recoil_Energy<0:
        print("Compound nucleus recoil energy NOT Found")
    
        
    return Beam_Energy, Coulomb_Barrier, Center_Mass, Recoil_Energy

#### Code: extract Production Info from PDF

In [3]:
def Extract_ProductInf(pdf):
    start_flag = False 
    end_flag = False
    Production_Inf = []
    for page_num, page in enumerate(pdf.pages[1:]):
        if start_flag and end_flag: break
        text = page.extract_text()
        if text:
            lines = text.split('\n')
            for i, line in enumerate(lines):
                if "Yields of residual nuclei" in line: 
                    start_flag = True
                    continue
                if "Angular distribution results" in line: 
                    end_flag = True
                    break
                if start_flag and not end_flag:
                    Production_Inf.append(line.split())
    if not Production_Inf:
        print("Yields of residual nuclei NOT Found")
    return Production_Inf

#### Code: filter Production Info

In [4]:
def Filter_Inf(Production):
    # If production cross pages, there will be line written in "Production" with one element which returns page number
    # So we need to delete it first
    Production = [row for row in Production if len(row) > 1]

    # Take out the last line which contains total information for the reaction
    # Then delete it from "Production"
    Total = Production[-1]
    Production = Production[:-1]

    # Add "isotop for the Column name"
    Production[0].insert(3, "isotope")
    
    # Only Record Z, N, A, isotope, xsection information into Excel
    # And convert all values to int or float
    Production_Record = [[row[0], row[1], row[2], row[3], row[6]] for row in Production]
    for row in Production_Record[1:]:
        row[0] = int(row[0])  # Convert Z to int
        row[1] = int(row[1])  # Convert N to int
        row[2] = int(row[2])  # Convert A to int
        row[4] = float(row[4])  # Convert x-section(mb) to float
    
    return Total, Production_Record

#### Code: write info to excel

In [5]:
def Write_Excel(Production_Record, Total, file_path, Beam_Energy=0, Recoil_Energy=0):
    #================== Update "Initial" sheet ================#
    # Load the existing workbook and the "Initial" sheet
    workbook = load_workbook(file_path)
    existing_df = pd.read_excel(file_path, sheet_name="Initial", engine='openpyxl')
    
    # Update the existing DataFrame with recoil energy and total x-sec for the current beam energy
    row_index = existing_df[existing_df["Beam Energy (MeV)"] == Beam_Energy].index[0]
    existing_df.at[row_index, "Recoil Energy (MeV)"] = Recoil_Energy
    existing_df.at[row_index, "Total xsec (mb)"] = float(Total[-1])

    # Write the updated DataFrame back to the "Initial" sheet
    existing_worksheet = workbook["Initial"] 
    for row in range(len(existing_df)):
        for col in range(len(existing_df.columns)):
            existing_worksheet.cell(row=row + 2, column=col + 1, value=existing_df.iat[row, col])  # +2 to account for header
    # Save the workbook after updating the existing sheet
    workbook.save(file_path)

    #================== Create new sheet for PACE4 result ================#
    # Convert the first row as headers and the rest recorded into DataFrame
    df = pd.DataFrame(Production_Record[1:], columns=Production_Record[0])
    
    # Create a new row for the DataFrame based on Total
    total_row = pd.DataFrame({
        'Z': [Total[0]],  # The first element of Total in 'Z' column
        'x-section(mb)': [float(Total[-1])]  # The last element of Total in 'x-section(mb)' column
    })
    
    # Append the total_row to the DataFrame
    df = pd.concat([df, total_row], ignore_index=True)
    
    # Create a new sheet name based on Beam Energy
    new_sheet_name = f'{Beam_Energy}MeV'

    # Now create the new sheet with the updated DataFrame
    with pd.ExcelWriter(file_path, mode='a', engine='openpyxl', if_sheet_exists='replace') as writer:
        df.to_excel(writer, sheet_name=new_sheet_name, index=False)

    # Reopen the workbook to apply formatting
    workbook = load_workbook(file_path)

    # Set column width and alignment for the new sheet
    worksheet = workbook[new_sheet_name]
    for column in worksheet.columns:
        column_letter = column[0].column_letter
        worksheet.column_dimensions[column_letter].width = 16
        for cell in column:  # Align each cell in the column
            cell.alignment = Alignment(horizontal='right')
   
    # Final save
    workbook.save(file_path)

#### Code: Read PDF and Write EXCEL

In [6]:
def RWProcess(pdf_path, excel_path):
    with pdfplumber.open(pdf_path) as pdf:
        # Extract basic information for this reaction
        page = pdf.pages[0]
        text = page.extract_text()
        Beam_Energy, Coulomb_Barrier, Center_Mass, Recoil_Energy = Extract_CompoundInf(text)
        # Extract info for productions from the reaction
        Production = Extract_ProductInf(pdf)

    # Filter Production info to write them into Spreadsheet
    Total, Production_Record = Filter_Inf(Production)

    # Write Production Info into Excel:
    Write_Excel(Production_Record, Total, excel_path, Beam_Energy, Recoil_Energy)    

#### Code(need to call): loop PACE4 folder

In [7]:
def RWPACE4(folder_path, excel_path):
    for pdf_file in os.listdir(folder_path):
        if pdf_file.endswith(".pdf"):
            pdf_path = os.path.join(folder_path, pdf_file)
            RWProcess(pdf_path, excel_path)

#### Test!!!

In [8]:
folder_path = "/Users/yiyizhu/Packages/DoesCal/example/PACE4"
excel_path = "/Users/yiyizhu/Packages/DoesCal/example/DoesCal.xlsx"
RWPACE4(folder_path, excel_path)