In [None]:
import subprocess
import os

output_dir_p = "./data/processed/"
temp_file = os.path.join(output_dir_p, "temp.fasta")

clustalo_cmd = f"~/bin/clustalo -i {temp_file} -o {os.path.join(output_dir_p, 'output.stockholm')} --outfmt stockholm --force"
result = subprocess.run(clustalo_cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)

# Print standard output and standard error
print(result.stdout.decode())
print(result.stderr.decode())

In [None]:
import os

# Add the path to hmmbuild to the PATH environment variable
os.environ['PATH'] += ':/usr/local/bin'

# Now try running your hmmbuild command again

In [None]:
import subprocess
import os
from Bio import SeqIO
from Bio.SeqRecord import SeqRecord

def read_fasta(fasta_file):
    """
    Reads a FASTA file and returns a dictionary of sequences.
    
    Parameters:
    fasta_file (str): Path to the input FASTA file.

    Returns:
    dict: A dictionary where keys are sequence IDs and values are sequence records.
    """
    sequences = SeqIO.to_dict(SeqIO.parse(fasta_file, "fasta"))
    return sequences

def write_alignment(sequences, output_file):
    """
    Writes the given sequences to a FASTA file for alignment.

    Parameters:
    sequences (dict): Dictionary of sequences to be written.
    output_file (str): Path to the output FASTA file.
    """
    alignments = [SeqRecord(seq_record.seq, id=seq_id) for seq_id, seq_record in sequences.items()]
    SeqIO.write(alignments, output_file, "fasta")

def run_clustalo(input_fasta, output_stockholm):
    """
    Runs Clustal Omega to perform multiple sequence alignment and save it in Stockholm format.

    Parameters:
    input_fasta (str): Path to the input FASTA file.
    output_stockholm (str): Path to the output Stockholm file.
    
    Returns:
    str: Standard output and standard error from the Clustal Omega command.
    """
    clustalo_cmd = f"~/bin/clustalo -i {input_fasta} -o {output_stockholm} --outfmt stockholm --force"
    print(f"Running command: {clustalo_cmd}")  # Add this line to print the command being run
    
    result = subprocess.run(clustalo_cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    
    # Print standard output and error for debugging
    print("Standard Output:", result.stdout.decode())
    print("Standard Error:", result.stderr.decode())
    
    if result.returncode != 0:
        raise RuntimeError(f"Clustal Omega failed with error: {result.stderr.decode()}")
    
    return result.stdout.decode(), result.stderr.decode()



In [None]:
def build_hmm(alignment_file, output_hmm):
    """
    Builds an HMM model from a Stockholm alignment file using hmmbuild.

    Parameters:
    alignment_file (str): Path to the input Stockholm alignment file.
    output_hmm (str): Path to the output HMM file.
    """
    command = ['hmmbuild', output_hmm, alignment_file]
    
    try:
        process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        stdout, stderr = process.communicate()
        if stderr:
            raise RuntimeError(f"hmmbuild error: {stderr.decode()}")
        return stdout.decode(), stderr.decode()
    except Exception as e:
        raise RuntimeError(f"An error occurred while building HMM: {e}")

def main(fasta_file, output_dir, clustal_exe_path="~/bin/clustalo"):
    """
    Main function to handle the overall process of reading sequences, aligning them, and building an HMM model.

    Parameters:
    fasta_file (str): Path to the input FASTA file.
    output_dir (str): Directory where output files will be saved.
    clustal_exe_path (str): Path to the Clustal Omega executable. Default is '~/bin/clustalo'.
    """
    # Ensure output directory exists
    os.makedirs(output_dir, exist_ok=True)
    
    # Define paths
    temp_fasta = os.path.join(output_dir, "temp.fasta")
    stockholm_output = os.path.join(output_dir, "output.stockholm")
    hmm_output = os.path.join(output_dir, "output.hmm")
    
    # Step 1: Read FASTA sequences
    sequences = read_fasta(fasta_file)
    
    # Step 2: Write sequences to temp file
    write_alignment(sequences, temp_fasta)
    
    # Step 3: Run Clustal Omega
    try:
        stdout, stderr = run_clustalo(temp_fasta, stockholm_output)
        print(stdout)
        print(stderr)
    except RuntimeError as e:
        print(f"Error in Clustal Omega: {e}")
        return
    
    # Step 4: Build HMM model
    try:
        stdout, stderr = build_hmm(stockholm_output, hmm_output)
        print(stdout)
        print(stderr)
    except RuntimeError as e:
        print(f"Error in hmmbuild: {e}")
        return
    
    print("HMM model created successfully!")



In [None]:
if __name__ == "__main__":
    fasta_file = "./data/raw/Archived/IntAmidases.fasta"
    output_dir = "./data/processed/"
    
    # Run the main function
    main(fasta_file, output_dir)

In [None]:
#Making an stockholm alignment using Clustalo
  
from Bio import SeqIO
from Bio.Align import MultipleSeqAlignment
from Bio.SeqRecord import SeqRecord
import subprocess
import os

output_dir_p = "./data/processed/"


In [None]:
# Optional - stockholm alignment could also be created e.g. in a more visual environment such as Jalview
# Path to input FASTA file
fasta_file = "./data/raw/Archived/IntAmidases.fasta"

# Read sequences from FASTA file*
sequences = SeqIO.to_dict(SeqIO.parse(fasta_file, "fasta"))

# Create a list to store individual sequence alignments
alignments = []

# Perform pairwise alignment for each sequence
for seq_id, seq_record in sequences.items():
    # Append sequence to the alignment list
    alignments.append(SeqRecord(seq_record.seq, id=seq_id))

# Write the alignment to a temporary file

temp_file = os.path.join(output_dir_p, "temp.fasta")
SeqIO.write(alignments, temp_file, "fasta")

# Perform multiple sequence alignment using Clustal Omega
clustalo_cmd = f"clustalo -i {temp_file} -o {os.path.join(output_dir_p, 'output.stockholm')} --outfmt stockholm --force"
subprocess.run(clustalo_cmd, shell=True, check=True)

print("Stockholm alignment generated successfully!")


In [None]:
#Making a HMM model based on that alignment
  #hmmbuild [-options] <hmmfile_out> <msafile>

alignment = os.path.join(output_dir_p, '240220_LauraLipasesAlign.sto')
#alignment = os.path.join(output_dir_p, 'output.stockholm')
# Defining HMM and target to analyse

outputhmm = os.path.join(output_dir_p, 'Lipase.hmm')
#Target = outputFA_file

commandalign = ['hmmbuild', outputhmm, alignment]

try:
    # run the command and capture the output
    processalign = subprocess.Popen(commandalign, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    stdout, stderr = processalign.communicate()
    if stderr:
        print("Error:", stderr.decode())
except Exception as e:
    print("An error occurred:", e)