In [1]:
import pandas as pd
import json

df = pd.read_csv('../alphafold//to_json/SP_alpha_tt_receptors_116:216.csv')
df.drop(columns=["Unnamed: 0"], inplace=True)

output_name = "SP_alpha_tt_receptors_116:216"

# Function to parse a FASTA file and create a dictionary of sequences keyed by UniProt IDs
def parse_fasta(fasta_file):
    sequences = {}
    with open(fasta_file, 'r') as file:
        current_id = None
        current_seq = []
        for line in file:
            line = line.strip()
            if line.startswith(">"):
                if current_id:
                    sequences[current_id] = ''.join(current_seq)
                current_id = line.split('|')[1] if '|' in line else line[1:]
                current_seq = []
            else:
                current_seq.append(line)
        if current_id:
            sequences[current_id] = ''.join(current_seq)
    return sequences

# Path to the FASTA file
fasta_file = "../data/database.fasta"

# Parse the FASTA file
sequences = parse_fasta(fasta_file)

# Function to convert DataFrame to the specified JSON format
def df_to_json(df, sequences):
    json_list = []
    for idx, row in df.iterrows():
        job_entry = {
            "name": f"{row['prot1']} {row['prot2']}",
            "modelSeeds": [],
            "sequences": [
                {
                    "proteinChain": {
                        "sequence": sequences.get(row['prot1'], ""),
                        "count": 1
                    }
                },
                {
                    "proteinChain": {
                        "sequence": sequences.get(row['prot2'], ""),
                        "count": 1
                    }
                }
            ]
        }
        json_list.append(job_entry)
    return json_list

# Convert DataFrame to JSON format
json_output = df_to_json(df, sequences)

# Convert the list to a JSON string for output
json_string = json.dumps(json_output, indent=2)

with open(f'../alphafold/json/{output_name}.json', 'w') as json_file:
    json_file.write(json_string)

print(f"JSON data has been written to ~/PPI/alphafold/json/{output_name}.json")

JSON data has been written to ~/PPI/alphafold/json/SP_alpha_tt_receptors_116:216.json
