In [14]:
#This script cleans up csv files from ENSDF "Adopted Levels, Gammas"
    #Author A.Rounds
    #Updated Aug 21, 2025

#INSTRUCTIONS:
#Save Full ENSDF File as .csv
#Prior to running code:
    #Clear column "XREF"
    #Clear column "T 1/2(level)"
#If not properly erased, you'll get an error or improper formatting

In [16]:
import pandas as pd
import re

#Replace with .csv file desired
file_path = "/Users/roundsace/DataMine/HelperScripts/Messy173Hf-test.csv"
#file_path = "/Users/roundsace/DataMine/HelperScripts/adoptedLevels60Co.csv"

#loads file
df = pd.read_csv(file_path)
print(df.head())

#########################################
#Energy Level column cleaning 
energy_col = "E(level)(keV)"
df[energy_col] = df[energy_col].astype(str).str.strip()

#Remove trailing "?" ONLY if it appears at the very end
df[energy_col] = df[energy_col].str.replace(r'\?$', '', regex=True).str.strip()

#########################################
#JPi column splitting
# Set column name
split_col = 'Jπ(level)'

# Ensure the column is string
df[split_col] = df[split_col].astype(str).str.strip()
#df[split_col] = df[split_col].str.split(',')
#df = df.explode(split_col)
def process_jpi(cell):
    parts = [p.strip() for p in cell.split(',')]

    #Case 1: One value: keep as is
    if len(parts) == 1:
        return parts[0]
    
    #Case 2: Two values: keep the first
    elif len(parts) == 2:
        return parts[0]
    
    #Case 3: Three values: keep the middle & inherent sign
    elif len(parts) == 3:
        middle = parts[1]
        # If middle already has + or -, just return it
        if middle.endswith(('+', '-')):
            return middle
        else:
            # Check first and last for sign
            sign = ''
            for p in (parts[0], parts[2]):
                if p.endswith(('+', '-')):
                    sign = p[-1]
                    break
            return middle + sign

    # If more than 3 — fallback: keep first
    else:
        return parts[0]
        
# Apply transformation
df[split_col] = df[split_col].apply(process_jpi)

#Strip whitespace
#df[split_col] = df[split_col].str.strip()

#########################################
#Gamma & Intensity logic

gamma_col = 'E(γ)(keV)'
intensity_col = 'I(γ)'

#Split on commas
df[gamma_col] = df[gamma_col].astype(str).str.strip()
df[intensity_col] = df[intensity_col].astype(str).str.strip()

def pair_gamma_intensity(gammas, intensities):
    #Split into lists
    gamma_list = [g.strip() for g in gammas.split(',')] if gammas else []
    intensity_list = [i.strip() for i in intensities.split(',')] if intensities else []
    
    g_len = len(gamma_list)
    i_len = len(intensity_list)

    #More gammas than intensities
    if g_len > i_len and i_len == 1:
            #Repeat single intensity for all gammas
            intensity_list = intensity_list * g_len
            i_len = g_len
    
    #Missing intensities
    if i_len == 0:
        intensity_list = [''] * g_len
        i_len = g_len
    
    #Equal lengths pair directly
    if g_len == i_len:
        return list(zip(gamma_list, intensity_list))
     
    #Match length by trunc/extend, usually 100
    if g_len > i_len:
        intensity_list += [intensity_list[-1]] * (g_len - i_len)
    elif g_len < i_len:
        intensity_list = [intensity_list[:g_len]]

    return list(zip(gamma_list, intensity_list))

#New column with paired gamma-intensity values
df['Gamma-Intensity'] = df.apply(
    lambda row: pair_gamma_intensity(row[gamma_col], row[intensity_col]), axis=1
)

#Explode into separate rows
df = df.explode('Gamma-Intensity')

#Split the tuple back into two columns
df[[gamma_col, intensity_col]] = pd.DataFrame(
    df['Gamma-Intensity'].tolist(), index=df.index
)

#Remove helper column
df = df.drop(columns=['Gamma-Intensity'])

#Replace "100" (or "100.0") with "100 0" in intensity
df[intensity_col] = df[intensity_col].replace(
    to_replace=r'^100(?:\.0+)?$', 
    value='100 0', 
    regex=True
)

#########################################
#Multiplicity Column cleaning
def extract_E_values(cell):
    if pd.isna(cell):
        return []
    return re.findall(r'E\d+', cell)
#Apply func and explode to rows
df['M(γ)'] = df['M(γ)'].apply(extract_E_values)
df = df.explode('M(γ)')
#Fill blanks with E2
#ANALYZE INDIVIDUAL ELEMENTS!!! THIS WAS THE CASE FOR Co60 BUT MIGHT NOT BE FOR OTHERS!
df['M(γ)'] = df['M(γ)'].fillna('E 2')
#Add a space after the "E #"
df['M(γ)'] = df['M(γ)'].apply(lambda x: re.sub(r'E(\d+)', r'E \1', x))

#########################################
#Final Level Column cleaning
final_levels_col = "Final Levels"
#df[final_levels_col] = df[final_levels_col].astype(str).str.strip()
df[final_levels_col] = (
    df[final_levels_col]
    .astype(str)
    # Replace non-breaking spaces & other odd whitespace with normal space
    .str.replace(r'\s+', ' ', regex=True)
    #.str.replace(r'\s+', '', regex=True)
    .str.strip()
)
#Remove trailing "?" ONLY if it appears at the very end
df[final_levels_col] = df[final_levels_col].str.replace(r'\?$', '', regex=True).str.strip()

#########################################
#JPi Final Column cleaning
# Preserve JPi(final) exactly as input
jpi_final_col = "JPi(final)"
df[jpi_final_col] = df[jpi_final_col].astype(str)

#########################################
#Extra fixes
#Fix first row zeros -> "0.0 0" if needed
columns_to_fix = ["E(level)(keV)", "E(γ)(keV)", "Final Levels"]
for col in columns_to_fix:
    if col in df.columns:
        first_val = str(df.loc[0, col]).strip()
        if re.fullmatch(r'0+(\.0+)?', first_val):
            df.loc[0, col] = "0"

#Keep first row exactly as they were in source file
numeric_cols = ['E(γ)(keV)', 'I(γ)', 'M(γ)', 'Final Levels']
df.loc[1:, numeric_cols] = df.loc[1:, numeric_cols].fillna(0.0)

#########################################
#Save to a new csv file
df.to_csv("/Users/roundsace/DataMine/HelperScripts/Hf173_Messyfix-1.csv", index=False)

#Prints out the first five lines of program after cleaning
#compare to first five lines prior to cleaning 
print(df.head())

  E(level)(keV) Jπ(level)                           E(γ)(keV)  \
0             0      1/2-                                 NaN   
1       69.73 4      3/2-                             69.70 5   
2       81.49 5      5/2-                 11.9 2 ,     81.5 1   
3      107.16 5      5/2-  25.70 5 ,     37.40 5 ,    107.2 2   
4     197.23 10      7/2-                              90.0 2   

                 I(γ)             M(γ)                      Final Levels  \
0                 NaN              NaN                               NaN   
1                 100            M1+E2                                 0   
2            ,    100             , E2                69.73,         0.0   
3  11 3 ,    100 11 ,  M1, M1+E2, (E2)  81.49,        69.73,         0.0   
4                 100            M1+E2                            107.16   

         JPi(final)  
0               NaN  
1              1/2-  
2        3/2-, 1/2-  
3  5/2-, 3/2-, 1/2-  
4              5/2-  
  E(level)(keV) Jπ(l