In [1]:
import os
from Bio import SeqIO

def create_individual_fasta_files(fasta_file, output_directory):
    # Create the output directory if it doesn't exist
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)

    for record in SeqIO.parse(fasta_file, "fasta"):
        file_name = f"{output_directory}/{record.id}.fasta"
        with open(file_name, "w") as output_handle:
            SeqIO.write(record, output_handle, "fasta")

# Using the provided FASTA file
fasta_file = "/home/samith/Downloads/peptides_lengthLT60_pdbAvailable.fasta"
output_directory = "/media/samith/My Passport/Bench/fasta"  # Replace with your desired output directory

create_individual_fasta_files(fasta_file, output_directory)

In [None]:
import csv
import requests
import os
import shutil

# Paths
fasta_directory = "/media/samith/My Passport/Bench/fasta"  # Update this path
metadata_file = "/home/samith/Downloads/peptides_lt60_metadata.csv"  # Update this path if necessary
base_output_directory = "/media/samith/My Passport/Bench/output"  # Replace with your desired output directory

def read_metadata(csv_path):
    starpep_to_pdb = {}
    with open(csv_path, mode='r', newline='', encoding='utf-8') as csvfile:
        reader = csv.DictReader(csvfile)
        for row in reader:
            peptide = row['Peptide'].strip()
            metadata_entries = row['Metadata'].split(';')
            pdb_ids = [entry.split(':')[-1].strip() for entry in metadata_entries if 'PDB:' in entry]

            if peptide not in starpep_to_pdb:
                starpep_to_pdb[peptide] = set()
            starpep_to_pdb[peptide].update(pdb_ids)

    return starpep_to_pdb

def download_pdb_files(pdb_ids, output_path):
    for pdb_id in pdb_ids:
        url = f'https://files.rcsb.org/download/{pdb_id}.pdb'
        response = requests.get(url)
        if response.status_code == 200:
            file_path = os.path.join(output_path, f'{pdb_id}.pdb')
            with open(file_path, 'wb') as file:
                file.write(response.content)
            print(f"Downloaded {pdb_id}.pdb")
        else:
            print(f"Failed to download {pdb_id}. Status code: {response.status_code}")

def copy_fasta_file(starpep_id, fasta_directory, output_path):
    fasta_file_name = f"{starpep_id}.fasta"
    fasta_file_path = os.path.join(fasta_directory, fasta_file_name)
    if os.path.exists(fasta_file_path):
        shutil.copy(fasta_file_path, output_path)
        print(f"Copied FASTA file for {starpep_id}")
    else:
        print(f"FASTA file for {starpep_id} not found in {fasta_directory}")

def main():
    peptide_to_pdb = read_metadata(metadata_file)

    for starpep_id, pdb_ids in peptide_to_pdb.items():
        starpep_output_path = os.path.join(base_output_directory, starpep_id)
        os.makedirs(starpep_output_path, exist_ok=True)

        copy_fasta_file(starpep_id, fasta_directory, starpep_output_path)
        download_pdb_files(pdb_ids, starpep_output_path)

if __name__ == "__main__":
    main()


Copied FASTA file for starPep_00000
Downloaded 1BH1.pdb
Downloaded 3QRX.pdb
Downloaded 2MLT.pdb
Downloaded 2MW6.pdb
Copied FASTA file for starPep_00001
Downloaded 1F0D.pdb
Downloaded 5CGO.pdb
Downloaded 1D9O.pdb
Downloaded 5CGN.pdb
Downloaded 1F0H.pdb
Downloaded 1F0G.pdb
Downloaded 1D9M.pdb
Downloaded 1DUM.pdb
Downloaded 2LSA.pdb
Downloaded 1D9J.pdb
Downloaded 4MGP.pdb
Downloaded 1D9P.pdb
Downloaded 1D9L.pdb
Downloaded 2MAG.pdb
Downloaded 1F0E.pdb
Downloaded 1F0F.pdb
Copied FASTA file for starPep_00002
Downloaded 1G8C.pdb
Downloaded 1HR1.pdb
Downloaded 1G89.pdb
Downloaded 1QXQ.pdb
Downloaded 1QX9.pdb
Copied FASTA file for starPep_00003
Downloaded 2RTV.pdb
Downloaded 1MA5.pdb
Downloaded 1WO0.pdb
Downloaded 2MDB.pdb
Downloaded 1MA4.pdb
Downloaded 1MA2.pdb
Downloaded 1WO1.pdb
Downloaded 1MA6.pdb
Copied FASTA file for starPep_00004
Downloaded 2K6O.pdb
Downloaded 2FCG.pdb
Downloaded 2FBU.pdb
Downloaded 2LMF.pdb
Downloaded 2FBS.pdb
Copied FASTA file for starPep_00005
Downloaded 2MCV.pdb
Down

Downloaded 1L4V.pdb
Copied FASTA file for starPep_00167
Downloaded 1L4V.pdb
Downloaded 2LLD.pdb
Copied FASTA file for starPep_00168
Downloaded 4RBW.pdb
Downloaded 2MIT.pdb
Downloaded 4E82.pdb
Downloaded 4E83.pdb
Downloaded 4RBX.pdb
Downloaded 4E86.pdb
Downloaded 1ZMP.pdb
Downloaded 2LXZ.pdb
Downloaded 3I5W.pdb
Copied FASTA file for starPep_00170
Downloaded 1Q3J.pdb
Copied FASTA file for starPep_00180
Downloaded 2KHG.pdb
Downloaded 2KHF.pdb
Downloaded 2KEG.pdb
Copied FASTA file for starPep_00181
Downloaded 1R1F.pdb
Copied FASTA file for starPep_00182
Downloaded 2KNS.pdb
Downloaded 1XC0.pdb
Copied FASTA file for starPep_00184
Downloaded 2K38.pdb
Copied FASTA file for starPep_00193
Downloaded 1DUM.pdb
Downloaded 2MAG.pdb
Copied FASTA file for starPep_00208
Downloaded 1BNB.pdb
Copied FASTA file for starPep_00210
Downloaded 2K7G.pdb
Downloaded 3E4H.pdb
Copied FASTA file for starPep_00212
Downloaded 1T55.pdb
Downloaded 1T51.pdb
Downloaded 1T52.pdb
Downloaded 1T54.pdb
Copied FASTA file for st

Downloaded 1FJN.pdb
Copied FASTA file for starPep_00665
Downloaded 2LZI.pdb
Copied FASTA file for starPep_00681
Downloaded 2K10.pdb
Copied FASTA file for starPep_00683
Downloaded 1VB8.pdb
Copied FASTA file for starPep_00684
Downloaded 2L3I.pdb
Copied FASTA file for starPep_00687
Downloaded 1X22.pdb
Copied FASTA file for starPep_00699
Downloaded 2KCN.pdb
Copied FASTA file for starPep_00701
Downloaded 1MIJ.pdb
Downloaded 2MIJ.pdb
Copied FASTA file for starPep_00702
Downloaded 5X3L.pdb
Copied FASTA file for starPep_00726
Downloaded 2LAM.pdb
Copied FASTA file for starPep_00727
Downloaded 2KCH.pdb
Downloaded 1KAL.pdb
Downloaded 1PT4.pdb
Copied FASTA file for starPep_00728
Downloaded 1WN4.pdb
Downloaded 2KHB.pdb
Copied FASTA file for starPep_00730
Downloaded 1LYP.pdb
Copied FASTA file for starPep_00736
Downloaded 2L9X.pdb
Copied FASTA file for starPep_00737
Downloaded 2KET.pdb
Copied FASTA file for starPep_00739
Downloaded 1EH3.pdb
Downloaded 1Z6W.pdb
Downloaded 1Z6V.pdb
Copied FASTA file fo

Downloaded 2FCG.pdb
Downloaded 2FBU.pdb
Downloaded 2LMF.pdb
Downloaded 2FBS.pdb
Copied FASTA file for starPep_01279
Downloaded 1LGB.pdb
Copied FASTA file for starPep_01314
Downloaded 1LFG.pdb
Copied FASTA file for starPep_01325
Downloaded 1LU8.pdb
Downloaded 1TYK.pdb
Copied FASTA file for starPep_01341
Downloaded 2NDC.pdb
Copied FASTA file for starPep_01347
Downloaded 2MHW.pdb
Copied FASTA file for starPep_01352
Downloaded 2MHW.pdb
Copied FASTA file for starPep_01353
Downloaded 2MHW.pdb
Copied FASTA file for starPep_01369
Downloaded 1VB8.pdb
Copied FASTA file for starPep_01370
Downloaded 2KCG.pdb
Copied FASTA file for starPep_01375
Downloaded 2G9P.pdb
Copied FASTA file for starPep_01404
Downloaded 1NB1.pdb
Copied FASTA file for starPep_01405
Downloaded 1KAL.pdb
Copied FASTA file for starPep_01406
Downloaded 1KAL.pdb
Copied FASTA file for starPep_01407
Downloaded 1KAL.pdb
Copied FASTA file for starPep_01408
Downloaded 1KAL.pdb
Copied FASTA file for starPep_01409
Downloaded 2KHB.pdb
Copi

Downloaded 1P00.pdb
Copied FASTA file for starPep_02106
Downloaded 2BWK.pdb
Copied FASTA file for starPep_02109
Downloaded 1XKM.pdb
Copied FASTA file for starPep_02114
Downloaded 2OJM.pdb
Downloaded 2JOS.pdb
Downloaded 2MCU.pdb
Copied FASTA file for starPep_02144
Downloaded 2JQ0.pdb
Copied FASTA file for starPep_02148
Downloaded 2JUI.pdb
Copied FASTA file for starPep_02149
Downloaded 2JUI.pdb
Copied FASTA file for starPep_02164
Downloaded 2LT8.pdb
Copied FASTA file for starPep_02166
Downloaded 1FJN.pdb
Copied FASTA file for starPep_02200
Downloaded 2KCG.pdb
Copied FASTA file for starPep_02205
Downloaded 1VM4.pdb
Copied FASTA file for starPep_02207
Downloaded 1VM2.pdb
Copied FASTA file for starPep_02223
Downloaded 2KHB.pdb
Copied FASTA file for starPep_02225
Downloaded 1KAL.pdb
Copied FASTA file for starPep_02228
Downloaded 1KAL.pdb
Copied FASTA file for starPep_02229
Downloaded 1KAL.pdb
Copied FASTA file for starPep_02234
Downloaded 1LGB.pdb
Copied FASTA file for starPep_02236
Download

Downloaded 3NJW.pdb
Copied FASTA file for starPep_03194
Downloaded 1LYP.pdb
Copied FASTA file for starPep_03211
Downloaded 2JX6.pdb
Copied FASTA file for starPep_03212
Downloaded 2JX6.pdb
Copied FASTA file for starPep_03213
Downloaded 2JX6.pdb
Copied FASTA file for starPep_03214
Downloaded 2JX6.pdb
Copied FASTA file for starPep_03221
Downloaded 2KET.pdb
Copied FASTA file for starPep_03249
Downloaded 5U9X.pdb
Downloaded 5U9Y.pdb
Copied FASTA file for starPep_03264
Downloaded 2LA0.pdb
Copied FASTA file for starPep_03271
Downloaded 1XV3.pdb
Copied FASTA file for starPep_03273
Downloaded 1QX9.pdb
Copied FASTA file for starPep_03290
Downloaded 1HR1.pdb
Copied FASTA file for starPep_03299
Downloaded 1ZUF.pdb
Downloaded 1D6B.pdb
Downloaded 1ZUE.pdb
Copied FASTA file for starPep_03330
Downloaded 2L9A.pdb
Copied FASTA file for starPep_03331
Downloaded 2L99.pdb
Copied FASTA file for starPep_03332
Downloaded 2L96.pdb
Copied FASTA file for starPep_03334
Downloaded 1SKK.pdb
Copied FASTA file for st

Downloaded 1GOE.pdb
Copied FASTA file for starPep_04752
Downloaded 2KUS.pdb
Copied FASTA file for starPep_04755
Downloaded 2L2R.pdb
Copied FASTA file for starPep_04763
Downloaded 2JX6.pdb
Copied FASTA file for starPep_04774
Downloaded 1ZMQ.pdb
Copied FASTA file for starPep_04817
Downloaded 2P5H.pdb
Copied FASTA file for starPep_04820
Downloaded 1GMK.pdb
Downloaded 1BDW.pdb
Downloaded 2Y6N.pdb
Downloaded 1NRU.pdb
Downloaded 2IZQ.pdb
Downloaded 1GRM.pdb
Downloaded 1AV2.pdb
Downloaded 2Y5M.pdb
Downloaded 1MAG.pdb
Downloaded 1W5U.pdb
Downloaded 1NRM.pdb
Downloaded 1MIC.pdb
Downloaded 1C4D.pdb
Downloaded 3L8L.pdb
Downloaded 3ZQ8.pdb
Downloaded 1JNO.pdb
Downloaded 2XDC.pdb
Downloaded 1AL4.pdb
Copied FASTA file for starPep_04882
Downloaded 2MJK.pdb
Copied FASTA file for starPep_04888
Downloaded 1CIX.pdb
Copied FASTA file for starPep_04906
Downloaded 1V91.pdb
Copied FASTA file for starPep_04918
Downloaded 3GO0.pdb
Copied FASTA file for starPep_04943
Downloaded 2MI1.pdb
Copied FASTA file for st