In [None]:
# 

import pandas as pd
import requests
import os

def fetch_fasta(uniprot_id):
    """Fetches the canonical FASTA sequence from UniProt."""
    url = f"https://rest.uniprot.org/uniprotkb/{uniprot_id}.fasta"
    response = requests.get(url)
    if response.status_code == 200:
        return response.text
    else:
        print(f"Failed to fetch {uniprot_id}. Status code: {response.status_code}")
        return None

def read_fasta_from_file(file_path):
    """Reads a FASTA file from a local file path."""
    try:
        with open(file_path, 'r') as f:
            return f.read()
    except FileNotFoundError:
        print(f"FASTA file not found: {file_path}")
        return None

def save_fasta_combination(protein_of_interest_fasta, interactor_fasta, output_dir, poi_id, interactor_id):
    """Saves a combined FASTA file."""
    combined_fasta = protein_of_interest_fasta + "\n" + interactor_fasta
    file_path = os.path.join(output_dir, f"{poi_id}_{interactor_id}.fasta")
    with open(file_path, 'w') as f:
        f.write(combined_fasta)
    print(f"Saved {file_path}")

def generate_fasta_combinations(df, poi_fasta_path, accession_column='accession', output_dir='fasta_combinations'):
    """
    Generates and saves combined FASTA files for each interactor.

    Parameters:
    - df (pd.DataFrame): A DataFrame containing protein interactors, with at least an accession column.
    - poi_fasta_path (str): Path to the FASTA file for the protein of interest (e.g. LMCD1 module).
    - accession_column (str): Name of the column in `df` that contains UniProt accession IDs. Default is 'accession'.
    - output_dir (str): Directory where the combined FASTA files will be saved. Default is 'fasta_combinations'.
    """
    
    # Create output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)

    # Read protein of interest FASTA from file
    protein_of_interest_fasta = read_fasta_from_file(poi_fasta_path)
    if protein_of_interest_fasta is None:
        print("Failed to read protein of interest FASTA file. Exiting...")
        return
    
    # Loop over interactors
    for interactor_id in df[accession_column].dropna().unique():
        interactor_fasta = fetch_fasta(interactor_id)
        if interactor_fasta:
            save_fasta_combination(protein_of_interest_fasta, interactor_fasta, output_dir, "lmcd1_pet", interactor_id)



In [None]:
file_path = r"G:\My Drive\Uni\Thesis\Data\datasets_cutoff\final_results\Stefano_EXT983_LMCD1_PET_processed_results.xlsx"
sheet_name = "below_not_in_above"

df = pd.read_excel(file_path, sheet_name=sheet_name)

In [15]:
poi_fasta_path = r"G:\My Drive\Uni\Thesis\Data\alphafold\Fastas\lmcd1_pet.fasta"
generate_fasta_combinations(df, poi_fasta_path, output_dir=r"G:\My Drive\Uni\Thesis\Data\alphafold\Fastas\070225")


Failed to fetch P46940 . Status code: 400
Saved G:\My Drive\Uni\Thesis\Data\alphafold\Fastas\070225\lmcd1_pet_P07437.fasta
Failed to fetch Q9BUF5 . Status code: 400
Failed to fetch P53621 . Status code: 400
Failed to fetch Q01082 . Status code: 400
Failed to fetch Q13813 . Status code: 400
Failed to fetch P60660 . Status code: 400
Failed to fetch Q9NR30 . Status code: 400
Failed to fetch P08670 . Status code: 400
Failed to fetch Q13347 . Status code: 400
Failed to fetch P12814 . Status code: 400
Failed to fetch Q13509 . Status code: 400
Failed to fetch P55072 . Status code: 400
Failed to fetch P21333 . Status code: 400
Failed to fetch Q00610 . Status code: 400
Failed to fetch Q99832 . Status code: 400


Version that stores all fastas in 1 file (for AlphaPulldown):

In [11]:

file_path = r"G:\My Drive\Uni\Thesis\Data\datasets_cutoff\final_results\Stefano_EXT983_LMCD1_PET_processed_results.xlsx"
sheet_name = "below_not_in_above"

df = pd.read_excel(file_path, sheet_name=sheet_name)

In [None]:
import pandas as pd
import requests
import os

def generate_combined_fasta(df, accession_column='accession', output_file='combined_interactors.fasta'):
    """Fetches FASTA sequences for all interactors and saves them into a single file."""
    fasta_sequences = []

    # Loop over unique interactors
    for interactor_id in df[accession_column].dropna().unique():
        interactor_fasta = fetch_fasta(interactor_id)
        if interactor_fasta:
            fasta_sequences.append(interactor_fasta)

    # Write all sequences to a single file
    if fasta_sequences:
        with open(output_file, 'w') as f:
            f.write("\n".join(fasta_sequences))
        print(f"Saved combined FASTA file: {output_file}")
    else:
        print("No valid FASTA sequences retrieved.")



In [18]:
protlist = ['P46940', #16 protiens interacting with TES, DUX4 and LMCD1
'P07437',
'Q9BUF5',
'P53621',
'Q01082',
'Q13813',
'P60660',
'Q9NR30',
'P08670',
'Q13347',
'P12814',
'Q13509',
'P55072',
'P21333',
'Q00610',
'Q99832']
df = pd.DataFrame({"accession": protlist})

In [None]:
generate_combined_fasta(df,  output_file=r"D:\Fastas\070225\070225.fasta")

Saved combined FASTA file: D:\Fastas\070225\070225.fasta


In [None]:
# Save fasta files for each interactor in a separate file
import pandas as pd
import requests
import os

def fetch_fasta(uniprot_id):
    """Fetches the canonical FASTA sequence from UniProt."""
    url = f"https://rest.uniprot.org/uniprotkb/{uniprot_id}.fasta"
    response = requests.get(url)
    if response.status_code == 200:
        return response.text
    else:
        print(f"Failed to fetch {uniprot_id}. Status code: {response.status_code}")
        return None

def save_fasta_files(df, accession_column='accession', output_dir='fasta_sequences'):
    """Fetches FASTA sequences for all interactors and saves each to an individual file."""
    # Create output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)

    for interactor_id in df[accession_column].dropna().unique():
        interactor_fasta = fetch_fasta(interactor_id)
        if interactor_fasta:
            file_path = os.path.join(output_dir, f"{interactor_id}.fasta")
            with open(file_path, 'w') as f:
                f.write(interactor_fasta)
            print(f"Saved FASTA file: {file_path}")



In [7]:
df = pd.read_excel(r"G:\My Drive\Uni\Thesis\Data\control_proteins.xlsx")

In [8]:
save_fasta_files(df, output_dir=r"D:\Fastas\controls")

Saved FASTA file: D:\Fastas\controls\Q86V25.fasta
Saved FASTA file: D:\Fastas\controls\P23435.fasta
Saved FASTA file: D:\Fastas\controls\Q86Y78.fasta
Saved FASTA file: D:\Fastas\controls\Q99616.fasta
Saved FASTA file: D:\Fastas\controls\Q8N8Q3.fasta
Saved FASTA file: D:\Fastas\controls\Q9NWW9.fasta
Saved FASTA file: D:\Fastas\controls\P0C7P0.fasta
Saved FASTA file: D:\Fastas\controls\Q8WXC3.fasta
Saved FASTA file: D:\Fastas\controls\Q8IVL8.fasta
Saved FASTA file: D:\Fastas\controls\Q6UXV0.fasta
Saved FASTA file: D:\Fastas\controls\Q8N300.fasta
Saved FASTA file: D:\Fastas\controls\P62945.fasta
Saved FASTA file: D:\Fastas\controls\Q9BXJ8.fasta
Saved FASTA file: D:\Fastas\controls\P01850.fasta
Saved FASTA file: D:\Fastas\controls\P0DN84.fasta
Saved FASTA file: D:\Fastas\controls\Q9BW66.fasta
Saved FASTA file: D:\Fastas\controls\Q6ZNX1.fasta
Saved FASTA file: D:\Fastas\controls\P01861.fasta
Saved FASTA file: D:\Fastas\controls\Q9BVK8.fasta
Saved FASTA file: D:\Fastas\controls\Q8IXS2.fasta


Combine fasta files from a certain directory with a specified fasta file.

In [None]:
import os

def read_fasta(file_path):
    """Reads a FASTA file from a local file path."""
    try:
        with open(file_path, 'r') as f:
            return f.read()
    except FileNotFoundError:
        print(f"FASTA file not found: {file_path}")
        return None

def combine_fasta_files(poi_fasta_path, fasta_dir, output_dir):
    """Combines each FASTA file in the directory with the protein of interest FASTA."""
    # Get POI name from its file name (excluding extension)
    poi_name = os.path.splitext(os.path.basename(poi_fasta_path))[0]

    # Read the protein of interest (POI) FASTA file
    poi_fasta = read_fasta(poi_fasta_path)
    if poi_fasta is None:
        print("Failed to read POI FASTA file. Exiting...")
        return

    # Ensure output directory exists
    os.makedirs(output_dir, exist_ok=True)

    # Iterate through all FASTA files in the directory
    for fasta_file in os.listdir(fasta_dir):
        if fasta_file.endswith(".fasta") or fasta_file.endswith(".fa"):
            fasta_path = os.path.join(fasta_dir, fasta_file)
            interactor_fasta = read_fasta(fasta_path)
            
            if interactor_fasta:
                # Create a combined FASTA content
                combined_fasta = poi_fasta + "\n" + interactor_fasta
                
                # Construct output file name: [POI]_[Interactor].fasta
                interactor_name = os.path.splitext(fasta_file)[0]  # Remove extension
                output_file_name = f"{poi_name}_{interactor_name}.fasta"
                output_file_path = os.path.join(output_dir, output_file_name)
                
                # Save the combined FASTA
                with open(output_file_path, 'w') as f:
                    f.write(combined_fasta)
                
                print(f"Saved combined FASTA: {output_file_path}")

In [19]:
combine_fasta_files(r"D:\Fastas\dux4_notail.fasta", r"D:\Fastas\controls", r"D:\Fastas\120525_dux4_notail_controls")

Saved combined FASTA: D:\Fastas\120525_dux4_notail_controls\dux4_notail_A6NGQ2.fasta
Saved combined FASTA: D:\Fastas\120525_dux4_notail_controls\dux4_notail_A6NI73.fasta
Saved combined FASTA: D:\Fastas\120525_dux4_notail_controls\dux4_notail_O75818.fasta
Saved combined FASTA: D:\Fastas\120525_dux4_notail_controls\dux4_notail_O95154.fasta
Saved combined FASTA: D:\Fastas\120525_dux4_notail_controls\dux4_notail_P01721.fasta
Saved combined FASTA: D:\Fastas\120525_dux4_notail_controls\dux4_notail_P01834.fasta
Saved combined FASTA: D:\Fastas\120525_dux4_notail_controls\dux4_notail_P01848.fasta
Saved combined FASTA: D:\Fastas\120525_dux4_notail_controls\dux4_notail_P01850.fasta
Saved combined FASTA: D:\Fastas\120525_dux4_notail_controls\dux4_notail_P01861.fasta
Saved combined FASTA: D:\Fastas\120525_dux4_notail_controls\dux4_notail_P01876.fasta
Saved combined FASTA: D:\Fastas\120525_dux4_notail_controls\dux4_notail_P01877.fasta
Saved combined FASTA: D:\Fastas\120525_dux4_notail_controls\dux4_