In [None]:
# Update: 19 April 2025 

# Post Processing AlphaFold3 Predictions
- Goal: Convert every predictions to DSSP format and store the results in a specified directory.
- Input: AlphaFold3 predictions in mmCIF format 
- Output: DSSP format files

In [17]:
import os
import shutil

# Define the path to the predictions directory
prediction_path = '../Data/Predictions'
output_path = '../Data/Predictions/highest_confident'

cb513_path = os.path.join(prediction_path, 'fold_cb513')
ts115_path = os.path.join(prediction_path, 'fold_ts115')
casp10_path = os.path.join(prediction_path, 'fold_casp10')

# Create a new directory for the highest confident predictions
os.makedirs(os.path.join(prediction_path, 'highest_confident'), exist_ok=True)
os.makedirs(os.path.join(prediction_path, 'highest_confident', 'cb513'), exist_ok=True)
os.makedirs(os.path.join(prediction_path, 'highest_confident', 'ts115'), exist_ok=True)
os.makedirs(os.path.join(prediction_path, 'highest_confident', 'casp10'), exist_ok=True)

In [19]:
def get_highest_confident_predictions(dataset_name):
    """
    This function processes the prediction files in the specified dataset and copies the highest confident predictions to a new directory.
    
    Args:
        dataset_name (str): The name of the dataset (e.g., 'cb513', 'ts115', 'casp10').
    """
    # Map dataset_name to its corresponding path
    dataset_path = {
        'cb513': cb513_path,
        'ts115': ts115_path,
        'casp10': casp10_path
    }.get(dataset_name)

    if not dataset_path:
        print(f"Invalid dataset name: {dataset_name}")
        return

    # Iterate through each folder in the dataset path
    for index, folder in enumerate(os.listdir(dataset_path)):
        # Check if the folder name starts with the dataset name and is a directory
        if folder.startswith(dataset_name) and os.path.isdir(os.path.join(dataset_path, folder)):
            print(f'Processing folder {folder}_{index}')
            folder_path = os.path.join(dataset_path, folder)
            for file in os.listdir(folder_path):
                # Check if the file is a .cif file
                if file.endswith('.cif'):
                    # Check if the file is the highest confident one
                    # Name format: fold_{dataset_name}_{index}_model_0 -> 0 is the highest confident
                    if file.startswith(f'fold_{dataset_name}') and file.endswith('_model_0.cif'):
                        # Copy the file to the new directory
                        src = os.path.join(folder_path, file)
                        dst = os.path.join(output_path, dataset_name, file)
                        shutil.copy(src, dst)
                        # Check if the file was copied successfully
                        if os.path.exists(dst):
                            print(f'File successfully copied to {dst}')
                        else:
                            print(f'Failed to copy file to {dst}')

In [20]:
get_highest_confident_predictions('cb513')

Processing folder cb513_0_0
File successfully copied to ../Data/Predictions/highest_confident\cb513\fold_cb513_0_model_0.cif
Processing folder cb513_1_1
File successfully copied to ../Data/Predictions/highest_confident\cb513\fold_cb513_1_model_0.cif
Processing folder cb513_10_2
File successfully copied to ../Data/Predictions/highest_confident\cb513\fold_cb513_10_model_0.cif
Processing folder cb513_100_3
File successfully copied to ../Data/Predictions/highest_confident\cb513\fold_cb513_100_model_0.cif
Processing folder cb513_101_4
File successfully copied to ../Data/Predictions/highest_confident\cb513\fold_cb513_101_model_0.cif
Processing folder cb513_102_5
File successfully copied to ../Data/Predictions/highest_confident\cb513\fold_cb513_102_model_0.cif
Processing folder cb513_103_6
File successfully copied to ../Data/Predictions/highest_confident\cb513\fold_cb513_103_model_0.cif
Processing folder cb513_104_7
File successfully copied to ../Data/Predictions/highest_confident\cb513\fold_

In [21]:
get_highest_confident_predictions('ts115')

Processing folder ts115_0_1
File successfully copied to ../Data/Predictions/highest_confident\ts115\fold_ts115_0_model_0.cif
Processing folder ts115_1_2
File successfully copied to ../Data/Predictions/highest_confident\ts115\fold_ts115_1_model_0.cif
Processing folder ts115_10_3
File successfully copied to ../Data/Predictions/highest_confident\ts115\fold_ts115_10_model_0.cif
Processing folder ts115_11_4
File successfully copied to ../Data/Predictions/highest_confident\ts115\fold_ts115_11_model_0.cif
Processing folder ts115_12_5
File successfully copied to ../Data/Predictions/highest_confident\ts115\fold_ts115_12_model_0.cif
Processing folder ts115_13_6
File successfully copied to ../Data/Predictions/highest_confident\ts115\fold_ts115_13_model_0.cif
Processing folder ts115_14_7
File successfully copied to ../Data/Predictions/highest_confident\ts115\fold_ts115_14_model_0.cif
Processing folder ts115_15_8
File successfully copied to ../Data/Predictions/highest_confident\ts115\fold_ts115_15_

In [22]:
get_highest_confident_predictions('casp10')

Processing folder casp10_0_0
File successfully copied to ../Data/Predictions/highest_confident\casp10\fold_casp10_0_model_0.cif
Processing folder casp10_1_1
File successfully copied to ../Data/Predictions/highest_confident\casp10\fold_casp10_1_model_0.cif
Processing folder casp10_10_2
File successfully copied to ../Data/Predictions/highest_confident\casp10\fold_casp10_10_model_0.cif
Processing folder casp10_100_3
File successfully copied to ../Data/Predictions/highest_confident\casp10\fold_casp10_100_model_0.cif
Processing folder casp10_101_4
File successfully copied to ../Data/Predictions/highest_confident\casp10\fold_casp10_101_model_0.cif
Processing folder casp10_102_5
File successfully copied to ../Data/Predictions/highest_confident\casp10\fold_casp10_102_model_0.cif
Processing folder casp10_103_6
File successfully copied to ../Data/Predictions/highest_confident\casp10\fold_casp10_103_model_0.cif
Processing folder casp10_104_7
File successfully copied to ../Data/Predictions/highest