In [None]:
#This script contains useful functions to compile AF3 input files
import pandas as pd
import json
import os
import math
import string
import numpy as np
import requests
import shutil
import random

In [None]:
def fetch_sequence(uniprot_id):
    url = f"https://www.uniprot.org/uniprot/{uniprot_id}.fasta"
    response = requests.get(url)
    
    if response.status_code == 200:
        # Extract sequence from FASTA format
        fasta_data = response.text
        sequence = "".join(fasta_data.splitlines()[1:])  # Skip the header line
        return sequence
    else:
        return None

def AF3_input_compile(job_id,seqs,smiles_list,output_dir):
    os.makedirs(output_dir, exist_ok=True)
    id_generator = iter(string.ascii_uppercase)
    af3_input = {
            "name": job_id,
            "modelSeeds": list(range(5)),  # At least one seed required.
            "sequences": [
                # Add each protein entry
                {
                    "protein": {
                        "id": next(id_generator),  # List of IDs corresponding to chains or proteins
                        "sequence": sequence.strip()
                    }
                } for index, sequence in enumerate(seqs)
            ] + [
                # Add each ligand entry
                {
                    "ligand": {
                        "id": next(id_generator),
                        "smiles": smile.strip()
                    }
                } for index, smile in enumerate(smiles_list)
            ],
            "dialect": "alphafold3",  # Required
            "version": 1  # Required
    }
    input_path = os.path.join(output_dir, f"{job_id}.json")
    # Save JSON input file
    with open(input_path, 'w') as json_file:
        json.dump(af3_input, json_file, indent=4)   


def AF3_input_compile_seed(job_id,seqs,smiles_list,output_dir,seed):
    os.makedirs(output_dir, exist_ok=True)
    id_generator = iter(string.ascii_uppercase)
    af3_input = {
            "name": job_id,
            "modelSeeds": list(range(seed)),  # At least one seed required.
            "sequences": [
                # Add each protein entry
                {
                    "protein": {
                        "id": next(id_generator),  # List of IDs corresponding to chains or proteins
                        "sequence": sequence.strip()
                    }
                } for index, sequence in enumerate(seqs)
            ] + [
                # Add each ligand entry
                {
                    "ligand": {
                        "id": next(id_generator),
                        "smiles": smile.strip()
                    }
                } for index, smile in enumerate(smiles_list)
            ],
            "dialect": "alphafold3",  # Required
            "version": 1  # Required
    }
    input_path = os.path.join(output_dir, f"{job_id}.json")
    # Save JSON input file
    with open(input_path, 'w') as json_file:
        json.dump(af3_input, json_file, indent=4)   

def AF3_input_compile_ran_seed(job_id,seqs,smiles_list,output_dir,ran_seed_num):
    os.makedirs(output_dir, exist_ok=True)
    id_generator = iter(string.ascii_uppercase)
    af3_input = {
            "name": job_id,
            "modelSeeds": [random.randint(1, 300) for _ in range(ran_seed_num)],  # At least one seed required.
            "sequences": [
                # Add each protein entry
                {
                    "protein": {
                        "id": next(id_generator),  # List of IDs corresponding to chains or proteins
                        "sequence": sequence.strip()
                    }
                } for index, sequence in enumerate(seqs)
            ] + [
                # Add each ligand entry
                {
                    "ligand": {
                        "id": next(id_generator),
                        "smiles": smile.strip()
                    }
                } for index, smile in enumerate(smiles_list)
            ],
            "dialect": "alphafold3",  # Required
            "version": 1  # Required
    }
    input_path = os.path.join(output_dir, f"{job_id}.json")
    # Save JSON input file
    with open(input_path, 'w') as json_file:
        json.dump(af3_input, json_file, indent=4)   

In [None]:
#Example use
seqs = ["MKKGHHHHHHTSCNPSDMSHGYVTVKPRVRLHFVELGSGPAVCLCHGFPESWYSWRYQIPALAQAGYRVLAMDMKGYGESSAPPEIEEYCMEVLCKEMVTFLDKLGLSQAVFIGHDWGGMLVWYMALFYPERVRAVASLNTPFIPANPNMSPLESIKANPVFDYQLYFQEPGVAEAELEQNLSRTFKSLFRASDESVLSMHKVCEAGGLFVNSPEEPSLSRMVTEEEIQFYVQQFKKSGFRGPLNWYRNMERNWKWACKSLGRKILIPALMVTAEKDFVLVPQMSQHMEDWIPHLKRGHIEDCGHWTQMDKPTEVNQILIKWLDSDARNPPVVSKM"]
AF3_input_compile_ran_seed(job_id="3ANT",seqs=seqs,smiles_list = ["O=C(NC2CC2c1ccccc1)N4CCC(c3nc(no3)C(C)C)CC4"],output_dir="/path/to/output/dir",ran_seed_num=10)