In [None]:
import pandas as pd
'''
Description: This script filters a dataset of SNF (Spent Nuclear Fuel) records
based on a list of SNF names provided in a text file. The filtered dataset
is saved to a new CSV file.
** TSC01 file
'''

def load_snf_list(txt_path):
    """
    Load SNF IDs from a text file. The text file should contain
    comma-separated SNF names (e.g., LJ1099, LJ1109,...).
    Returns a list of SNF names.
    """
    with open(txt_path, "r", encoding="utf-8") as file:
        content = file.read()
    # Split by comma and strip whitespace
    return [name.strip() for name in content.split(",") if name.strip()]


def filter_dataset_by_snf(df, snf_list):
    """
    Filter the input DataFrame and return only rows where the 'Name' column
    matches one of the SNF names in the list.
    """
    return df[df["Name"].isin(snf_list)].reset_index(drop=True)


def main():
    # Define file paths
    csv_path = "data/all_stdh_dataset.csv"
    snf_txt_path = "data/TSC01_SNFs.txt"

    # Load dataset and SNF list
    df = pd.read_csv(csv_path)
    snf_list = load_snf_list(snf_txt_path)

    # Filter dataset
    filtered_df = filter_dataset_by_snf(df, snf_list)

    # Optional: print result summary
    print(f"Original dataset size: {len(df)} rows")
    print(f"Filtered dataset size: {len(filtered_df)} rows")

    # Save filtered result if needed
    filtered_df.to_csv("all_stdh_dataset_tsc01.csv", index=False)


if __name__ == "__main__":
    main()

Original dataset size: 6870 rows
Filtered dataset size: 56 rows


In [None]:
#===============================================================
### Convert different Type -> Code to map 'Type' values in a dataset
#===============================================================

import pandas as pd

df = pd.read_csv('data/all_stdh_dataset.csv')

with open('TSC01_SNFs.txt', 'r') as f:
    text = f.read()
names = [n.strip() for n in text.replace(',', '\n').splitlines() if n.strip()]


mapping = df.set_index('Name')['SNF_id']
snf_ids = mapping.loc[names].tolist()

print(snf_ids)
with open('TSC01_SNF_id.txt', 'w') as f:
    f.write(', '.join(snf_ids))
pd.DataFrame({'SNF_id': snf_ids}).to_csv('TSC01_SNF_id.csv', index=False, header=False)


['1A0016', '1A0026', '1A0009', '1A0003', '1A0066', '1A0098', '1A0319', '1A0103', '1A0198', '1A0329', '1A0407', '1A0028', '1A0272', '1A0119', '1B0548', '1B0506', '1B0513', '1A0238', '1A0248', '1A0007', '1A0406', '1A0089', '1B0541', '1B0501', '1B0547', '1A0311', '1A0273', '1A0012', '1A0013', '1A0338', '1A0088', '1B0430', '1B0475', '1B0516', '1A0306', '1A0190', '1A0015', '1A0014', '1A0072', '1B0500', '1B0514', '1B0542', '1A0297', '1A0361', '1A0360', '1A0138', '1A0149', '1A0373', '1A0385', '1A0218', '1A0129', '1A0010', '1A0068', '1A0109', '1A0011', '1A0018']


In [None]:
#===============================================================
### Convert different Type -> Code to map 'Type' values in a dataset
#===============================================================

import pandas as pd
from pathlib import Path
project_root = Path.cwd().resolve().parents[1]
data_file = project_root / "data" / "test_files" / "all_stdh_dataset_tsc01.csv"
df = pd.read_csv(data_file)
type_map = {
    "GE88-1": "TypeA",
    "GE88-2": "TypeB",
    "Atrium10": "TypeC",
    "SPC88": "TypeD",
    "GE9B": "TypeE"
}

print("Original 'Type' values:")
print(df["Type"].unique(), "\n")

unknown = set(df["Type"].unique()) - set(type_map.keys())
if unknown:
    print(f"❗ Found unmapped Type values: {unknown}\n")
    print("Rows with unmapped Type values:")
    print(df[df["Type"].isin(unknown)])
    raise ValueError(f"Unmapped Type values found: {unknown}")

df["Type"] = df["Type"].replace(type_map)

df.to_csv(data_file, index=False)


Original 'Type' values:
['GE88-1' 'GE88-2'] 



In [None]:
#===============================================================
###  Create Test file for each page 
#===============================================================

import pandas as pd
from pathlib import Path

# Define input and output paths
project_root = Path.cwd().resolve().parents[2] 
input_path = Path(project_root/"pySNF/data/DataBase_SNFs/all_stdh_dataset.csv")
# input_path = Path(project_root/"pySNF/data/test_files/all_stdh_dataset.csv")
output_path = Path(project_root/"pySNF/data/TEST_all_snfs/All_SNFs_Id.csv")

# Load the original dataset
df = pd.read_csv(input_path)

# Keep only the specified columns
# selected_columns = ['Enrich', 'SP', 'Burnup', 'Cool']
selected_id = ['SNF_id']
# df_filtered = df[selected_columns]
df_filtered = df[selected_id]

# Compute the mean for each of the four columns
# mean_values = df[columns_to_average].mean()

# Save the filtered DataFrame to a new CSV
df_filtered.to_csv(output_path, index=False)

print(f"Filtered dataset saved to: {output_path.resolve()}")

In [None]:
"""
Rename all SNF detail CSV files in DataBase_SNFs/ according to the SNF_id mapping 
provided in all_stdh_dataset.csv, then delete the original files.

Example:
    LJ1084_CipMTU.csv   --> 1A0001_CipMTU.csv (original LJ1084_CipMTU.csv removed)
    LJ1084_gpMTU.csv    --> 1A0001_gpMTU.csv (original LJ1084_gpMTU.csv removed)
"""

import sys
from pathlib import Path
import pandas as pd

def main():
    # 1. Set up paths
    base_dir = Path("DataBase_SNFs")
    index_path = base_dir / "all_stdh_dataset.csv"

    # 2. Load the index dataset
    try:
        df_index = pd.read_csv(index_path)
    except Exception as e:
        print(f"Error: Could not read index file '{index_path}': {e}")
        sys.exit(1)

    # 3. Ensure required columns exist
    required_cols = {"Name", "SNF_id"}
    if not required_cols.issubset(df_index.columns):
        print(f"Error: Index file must contain columns: {required_cols}")
        sys.exit(1)

    # 4. Build mapping from Name to SNF_id (strings)
    df_index["Name"] = df_index["Name"].astype(str)
    df_index["SNF_id"] = df_index["SNF_id"].astype(str)
    name_to_id = dict(zip(df_index["Name"], df_index["SNF_id"]))

    # 5. Process each CSV in the folder, skipping the index file
    for file_path in base_dir.glob("*.csv"):
        if file_path.name == index_path.name:
            continue  # skip the master index

        stem = file_path.stem  # e.g. "LJ1084_CipMTU"
        parts = stem.split("_", 1)

        # 6. Validate filename format
        if len(parts) != 2:
            print(f"Warning: Skipping file with unexpected name format: {file_path.name}")
            continue

        name_prefix, suffix = parts

        # 7. Lookup SNF_id
        if name_prefix not in name_to_id:
            print(f"Error: No matching 'Name' entry for file '{file_path.name}'")
            continue

        new_id = name_to_id[name_prefix]
        new_filename = f"{new_id}_{suffix}.csv"
        new_path = base_dir / new_filename

        # 8. Read, write under new name, then delete original
        try:
            df_temp = pd.read_csv(file_path)
            df_temp.to_csv(new_path, index=False)
            file_path.unlink()
            print(f"Renamed and deleted '{file_path.name}' → '{new_filename}'")
        except Exception as e:
            print(f"Error processing '{file_path.name}': {e}")

if __name__ == "__main__":
    main()


In [1]:

"""
Load 'all_stdh_dataset.csv', drop the 'Name' column if present,
and overwrite the original file with the updated DataFrame.
"""

import sys
from pathlib import Path

import pandas as pd

def remove_name_column(csv_path: Path) -> None:
    """
    Reads the CSV at csv_path, removes the 'Name' column, and saves
    the result back to the same path.
    """
    try:
        df = pd.read_csv(csv_path)
    except Exception as e:
        print(f"Error: Cannot read '{csv_path}': {e}", file=sys.stderr)
        sys.exit(1)

    if "Name" not in df.columns:
        print(f"Warning: 'Name' column not found in '{csv_path}'. No changes made.")
        return

    # Drop the 'Name' column
    df = df.drop(columns=["Name"])

    try:
        # Overwrite the original CSV
        df.to_csv(csv_path, index=False)
        print(f"Success: 'Name' column removed and '{csv_path.name}' updated.")
    except Exception as e:
        print(f"Error: Cannot write to '{csv_path}': {e}", file=sys.stderr)
        sys.exit(1)

def main():
    # Path to the master dataset CSV
    project_root = Path.cwd().resolve().parents[2] 
    csv_file = Path(project_root/"pySNF/data/DataBase_SNFs/all_stdh_dataset.csv")

    if not csv_file.exists():
        print(f"Error: File not found: {csv_file}", file=sys.stderr)
        sys.exit(1)

    remove_name_column(csv_file)

if __name__ == "__main__":
    main()


Success: 'Name' column removed and 'all_stdh_dataset.csv' updated.


In [7]:
#!/usr/bin/env python3
"""
Read 'DataBase_SNFs/Prediction_all_batch.csv', insert a serial-number column 'S/n'
as the first column (1, 2, 3, …), and overwrite the original CSV file.
"""

import sys
from pathlib import Path

import pandas as pd

def add_serial_column(csv_path: Path) -> None:
    """
    Load the CSV at csv_path, insert a 1-based 'S/n' column at the front,
    and save back to the same path.
    """
    try:
        df = pd.read_csv(csv_path)
    except Exception as e:
        print(f"Error: Failed to read '{csv_path}': {e}", file=sys.stderr)
        sys.exit(1)

    # Insert serial numbers 1,2,... as the first column named "S/n"
    df.insert(0, "S/n", range(1, len(df) + 1))

    try:
        # Overwrite the original CSV without the index
        df.to_csv(csv_path, index=False)
        print(f"Success: Added 'S/n' column and updated '{csv_path.name}'.")
    except Exception as e:
        print(f"Error: Failed to write '{csv_path}': {e}", file=sys.stderr)
        sys.exit(1)

def reorder_columns(csv_path: Path) -> None:
    """
    Load the CSV at csv_path, reorder the columns to the specified sequence,
    and save back to the same path.
    """
    try:
        df = pd.read_csv(csv_path)
    except Exception as e:
        print(f"Error: Failed to read '{csv_path}': {e}", file=sys.stderr)
        sys.exit(1)

    # Desired column order
    new_order = ["Burnup", "Cool", "Enrich", "SP"]
    # Validate that all required columns are present
    missing = [col for col in new_order if col not in df.columns]
    if missing:
        print(f"Error: The following required columns are missing: {missing}", file=sys.stderr)
        sys.exit(1)

    # Reorder DataFrame; any extra columns will be dropped
    df_reordered = df[new_order]

    try:
        # Overwrite the original CSV without including the pandas index
        df_reordered.to_csv(csv_path, index=False)
        print(f"Success: Columns reordered and '{csv_path.name}' updated.")
    except Exception as e:
        print(f"Error: Failed to write '{csv_path}': {e}", file=sys.stderr)
        sys.exit(1)
def main():
    # Define path to the batch predictions CSV
    project_root = Path.cwd().resolve().parents[2] 
    input_path = Path(project_root/"pySNF/data/TEST_prediction")
    csv_file = input_path / "Prediction_all_batch.csv"
    if not csv_file.exists():
        print(f"Error: File not found: {csv_file}", file=sys.stderr)
        sys.exit(1)

    # add_serial_column(csv_file)
    reorder_columns(csv_file)
    add_serial_column(csv_file)

if __name__ == "__main__":
    main()


Success: Columns reordered and 'Prediction_all_batch.csv' updated.
Success: Added 'S/n' column and updated 'Prediction_all_batch.csv'.
