In [1]:
!pwd

/home/osheakes/Research_Project_MMM


# 1.0 Create Fasta Files from Accession ID inputs

In [5]:
import os
import requests

# Function to retrieve FASTA files from UniProt
def retrieve_fasta(accession_id):
    url = f"https://www.uniprot.org/uniprot/{accession_id}.fasta"
    response = requests.get(url)
    if response.status_code == 200:
        return response.text
    else:
        print(f"Failed to retrieve FASTA for {accession_id}")
        return None

# Function to write FASTA content to a file in the Fasta folder
def write_fasta_to_file(accession_id, fasta_content):
    folder = "/home/osheakes/Research_Project_MMM/Fasta"
    if not os.path.exists(folder):
        os.makedirs(folder)
    filename = os.path.join(folder, f"{accession_id}.fasta")
    with open(filename, "w") as f:
        f.write(fasta_content)
    print(f"FASTA file saved: {filename}")

# Main function
def main():
    # Read accession IDs from the text file
    accession_file = "Accession_ID_List.txt"
    with open(accession_file, "r") as f:
        accession_ids = [line.strip() for line in f if line.strip()]

    num_files_processed = 0 
    successful_files = 0  
    processed_ids = set() 

    # Retrieve and save FASTA files
    for accession_id in accession_ids:
        if accession_id in processed_ids:
            print(f"Duplicate accession ID found and skipped: {accession_id}")
            continue
        processed_ids.add(accession_id)

        num_files_processed += 1 
        fasta_content = retrieve_fasta(accession_id)
        if fasta_content:
            write_fasta_to_file(accession_id, fasta_content)
            successful_files += 1

    print(f"Number of files processed: {num_files_processed}")
    print(f"Number of successful FASTA files saved: {successful_files}")

if __name__ == "__main__":
    main()

# Additional script to count the number of FASTA files in the directory
directory_path = "/home/osheakes/Research_Project_MMM/Fasta/"

# Count the number of FASTA files in the directory
fasta_files = [file for file in os.listdir(directory_path) if file.endswith(".fasta")]
num_fasta_files = len(fasta_files)

print(f"Number of FASTA files in directory '{directory_path}': {num_fasta_files}")


FASTA file saved: /home/osheakes/Research_Project_MMM/Fasta/P01225.fasta
FASTA file saved: /home/osheakes/Research_Project_MMM/Fasta/P01241.fasta
FASTA file saved: /home/osheakes/Research_Project_MMM/Fasta/P10082.fasta
FASTA file saved: /home/osheakes/Research_Project_MMM/Fasta/P12872.fasta
FASTA file saved: /home/osheakes/Research_Project_MMM/Fasta/P35318.fasta
FASTA file saved: /home/osheakes/Research_Project_MMM/Fasta/P01282.fasta
FASTA file saved: /home/osheakes/Research_Project_MMM/Fasta/P18509.fasta
FASTA file saved: /home/osheakes/Research_Project_MMM/Fasta/P20366.fasta
FASTA file saved: /home/osheakes/Research_Project_MMM/Fasta/P46663.fasta
FASTA file saved: /home/osheakes/Research_Project_MMM/Fasta/P01160.fasta
FASTA file saved: /home/osheakes/Research_Project_MMM/Fasta/P16860.fasta
FASTA file saved: /home/osheakes/Research_Project_MMM/Fasta/P01258.fasta
FASTA file saved: /home/osheakes/Research_Project_MMM/Fasta/P06881.fasta
FASTA file saved: /home/osheakes/Research_Project_M