# Create AF3 inputs

To generate the needed .json files to run on AF3 server, run the following scripts

In [6]:
import json
from pathlib import Path

# Get the parent of the current working directory (../)
parent_dir = Path.cwd().parent
parent_dir

PosixPath('/home/sascha/data/Projects/affinity_project/affinity')

In [32]:

# === CONFIGURATION ===
dna_file = parent_dir / "data" / "01_raw" / "seqs.dat"       # Input file: one DNA sequence per line
protein_sequence = "DELCPVCGDKVSGYHYGLLTCESCKGFFKRTVQNNKHYTCTESQSCKIDKTQRKRCPFCRFQKCLTVGMRLEAVRADRMRGGRNKFGPMYKRDRALKQQKKA"
output_dir = "af3_json_batches"      # Output directory for JSON files
batch_size = 30                      # Max entries per JSON file

# === TEMPLATE FUNCTION ===
def make_query(dna_seq, protein_seq):
    return {
        "model": "alphafold3_multimer",
        "queries": [
            {
                "name": dna_seq,
                "sequence": f"{protein_seq}:{dna_seq}",
                "description": "protein:DNA complex"
            }
        ]
    }

# === DNA REVERSE COMPLEMENT ===
def reverse_complement(seq):
    complement = str.maketrans("ACGTacgt", "TGCAtgca")
    return seq.translate(complement)[::-1]

def main():
    # Load DNA sequences
    with open(dna_file, "r") as f:
        dna_seqs = [line.strip() for line in f if line.strip()]

    Path(output_dir).mkdir(exist_ok=True)

    batch = []
    batch_index = 1

    for i, dna_seq in enumerate(dna_seqs):
        revcomp = reverse_complement(dna_seq)

        query = {
            "name": dna_seq,
            "modelSeeds": [42],
            "sequences": [
                {
                    "proteinChain": {
                        "sequence": protein_sequence,
                        "count": 1,
                        "maxTemplateDate": "2025-02-03"
                    }
                    },
                    {
                    "dnaSequence": {
                        "sequence": dna_seq,
                        "count": 1
                    }
                    },
                    {
                    "dnaSequence": {
                        "sequence": revcomp,
                        "count": 1
                    }
                    },
                    {
                    "ion": {
                        "ion": "ZN",
                        "count": 2
                    }
                }
            ],
            "dialect": "alphafoldserver",
            "version": 1
        }

        batch.append(query)

        if len(batch) == batch_size or i == len(dna_seqs) - 1:
            json_obj = batch
            out_path = Path(output_dir) / f"batch_{batch_index:03}.json"
            with open(out_path, "w") as f:
                json.dump(json_obj, f, indent=2)
            print(f"✅ Wrote {len(batch)} queries to {out_path}")
            batch = []
            batch_index += 1

In [33]:
main()

✅ Wrote 30 queries to af3_json_batches/batch_001.json
✅ Wrote 30 queries to af3_json_batches/batch_002.json
✅ Wrote 30 queries to af3_json_batches/batch_003.json
✅ Wrote 30 queries to af3_json_batches/batch_004.json
✅ Wrote 30 queries to af3_json_batches/batch_005.json
✅ Wrote 30 queries to af3_json_batches/batch_006.json
✅ Wrote 30 queries to af3_json_batches/batch_007.json
✅ Wrote 30 queries to af3_json_batches/batch_008.json
✅ Wrote 30 queries to af3_json_batches/batch_009.json
✅ Wrote 30 queries to af3_json_batches/batch_010.json
✅ Wrote 30 queries to af3_json_batches/batch_011.json
✅ Wrote 30 queries to af3_json_batches/batch_012.json
✅ Wrote 30 queries to af3_json_batches/batch_013.json
✅ Wrote 30 queries to af3_json_batches/batch_014.json
✅ Wrote 30 queries to af3_json_batches/batch_015.json
✅ Wrote 30 queries to af3_json_batches/batch_016.json
✅ Wrote 30 queries to af3_json_batches/batch_017.json
✅ Wrote 30 queries to af3_json_batches/batch_018.json
✅ Wrote 30 queries to af3_js