In [1]:
import subprocess
import os
import statistics
from Bio import SeqIO, AlignIO
from Bio.Align import PairwiseAligner
from Bio.SeqRecord import SeqRecord
from Bio.Seq import Seq
import tempfile

class SequenceAligner:
    def __init__(self, input_fasta, output_dir="/Users/sarawut/Desktop/oneclick/"):
        self.input_fasta = input_fasta
        self.output_dir = output_dir
        self.sequences = []
        self.filtered_sequences = []
        self.target_species = []
        
        # ‡∏ï‡∏±‡πâ‡∏á‡∏Ñ‡πà‡∏≤‡πÄ‡∏Å‡∏ì‡∏ë‡πå‡∏Å‡∏≤‡∏£‡∏Å‡∏£‡∏≠‡∏á
        self.length_tolerance = 0.20  # 20%
        self.min_alignment_score = 0.6  # ‡∏Ñ‡∏∞‡πÅ‡∏ô‡∏ô alignment ‡∏Ç‡∏±‡πâ‡∏ô‡∏ï‡πà‡∏≥
        self.max_gap_percentage = 0.3   # % gaps ‡∏™‡∏π‡∏á‡∏™‡∏∏‡∏î‡∏ó‡∏µ‡πà‡∏¢‡∏≠‡∏°‡∏£‡∏±‡∏ö‡πÑ‡∏î‡πâ
        
        # ‡∏≠‡πà‡∏≤‡∏ô‡∏ã‡∏µ‡πÄ‡∏Ñ‡∏ß‡∏ô‡∏ã‡πå
        self._load_sequences()
    
    def _load_sequences(self):
        """‡∏≠‡πà‡∏≤‡∏ô‡∏ã‡∏µ‡πÄ‡∏Ñ‡∏ß‡∏ô‡∏ã‡πå‡∏à‡∏≤‡∏Å‡πÑ‡∏ü‡∏•‡πå FASTA"""
        print("üîç ‡∏Å‡∏≥‡∏•‡∏±‡∏á‡∏≠‡πà‡∏≤‡∏ô‡∏ã‡∏µ‡πÄ‡∏Ñ‡∏ß‡∏ô‡∏ã‡πå...")
        self.sequences = list(SeqIO.parse(self.input_fasta, "fasta"))
        print(f"  ‡∏û‡∏ö‡∏ã‡∏µ‡πÄ‡∏Ñ‡∏ß‡∏ô‡∏ã‡πå‡∏ó‡∏±‡πâ‡∏á‡∏´‡∏°‡∏î: {len(self.sequences)}")
        
        # ‡πÅ‡∏™‡∏î‡∏á‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡∏™‡∏ñ‡∏¥‡∏ï‡∏¥‡πÄ‡∏ö‡∏∑‡πâ‡∏≠‡∏á‡∏ï‡πâ‡∏ô
        lengths = [len(seq.seq) for seq in self.sequences]
        print(f"  ‡∏Ñ‡∏ß‡∏≤‡∏°‡∏¢‡∏≤‡∏ß‡πÄ‡∏â‡∏•‡∏µ‡πà‡∏¢: {statistics.mean(lengths):,.0f} bp")
        print(f"  ‡∏Ñ‡∏ß‡∏≤‡∏°‡∏¢‡∏≤‡∏ß‡∏Å‡∏•‡∏≤‡∏á: {statistics.median(lengths):,.0f} bp")
        print(f"  ‡∏™‡∏±‡πâ‡∏ô‡∏™‡∏∏‡∏î: {min(lengths):,.0f} bp")
        print(f"  ‡∏¢‡∏≤‡∏ß‡∏™‡∏∏‡∏î: {max(lengths):,.0f} bp")
    
    def set_target_species(self, species_keywords):
        """‡∏ï‡∏±‡πâ‡∏á‡∏Ñ‡πà‡∏≤‡∏™‡∏õ‡∏µ‡∏ä‡∏µ‡∏™‡πå‡∏´‡∏•‡∏±‡∏Å‡∏™‡∏≥‡∏´‡∏£‡∏±‡∏ö‡∏Å‡∏≤‡∏£ alignment"""
        self.target_species = species_keywords
        print(f"üéØ ‡∏ï‡∏±‡πâ‡∏á‡∏Ñ‡πà‡∏≤‡∏™‡∏õ‡∏µ‡∏ä‡∏µ‡∏™‡πå‡∏´‡∏•‡∏±‡∏Å: {species_keywords}")
    
    def check_sequence_quality(self, sequence):
        """‡∏ï‡∏£‡∏ß‡∏à‡∏™‡∏≠‡∏ö‡∏Ñ‡∏∏‡∏ì‡∏†‡∏≤‡∏û‡∏Ç‡∏≠‡∏á‡∏ã‡∏µ‡πÄ‡∏Ñ‡∏ß‡∏ô‡∏ã‡πå"""
        seq_str = str(sequence.seq).upper()
        
        # ‡∏ô‡∏±‡∏ö‡πÄ‡∏ö‡∏™‡∏ó‡∏µ‡πà‡πÑ‡∏°‡πà‡πÉ‡∏ä‡πà A,T,C,G
        valid_bases = set('ATCG')
        invalid_count = sum(1 for base in seq_str if base not in valid_bases)
        total_bases = len(seq_str)
        
        # ‡∏Ñ‡∏≥‡∏ô‡∏ß‡∏ì‡πÄ‡∏õ‡∏≠‡∏£‡πå‡πÄ‡∏ã‡πá‡∏ô‡∏ï‡πå‡πÄ‡∏ö‡∏™‡∏ó‡∏µ‡πà‡πÑ‡∏°‡πà‡∏ñ‡∏π‡∏Å‡∏ï‡πâ‡∏≠‡∏á
        invalid_percentage = (invalid_count / total_bases) * 100 if total_bases > 0 else 100
        
        # ‡πÄ‡∏Å‡∏ì‡∏ë‡πå‡∏Ñ‡∏∏‡∏ì‡∏†‡∏≤‡∏û: ‡∏¢‡∏≠‡∏°‡∏£‡∏±‡∏ö‡πÄ‡∏ö‡∏™‡∏ó‡∏µ‡πà‡πÑ‡∏°‡πà‡∏ñ‡∏π‡∏Å‡∏ï‡πâ‡∏≠‡∏á‡πÑ‡∏°‡πà‡πÄ‡∏Å‡∏¥‡∏ô 5%
        is_high_quality = invalid_percentage <= 5.0
        
        return {
            'is_high_quality': is_high_quality,
            'invalid_percentage': invalid_percentage,
            'invalid_count': invalid_count,
            'total_bases': total_bases
        }
    
    def remove_duplicates(self, sequences):
        """‡∏ï‡∏±‡∏î duplicate sequences ‡∏≠‡∏≠‡∏Å"""
        print("\nüîç ‡∏Å‡∏≥‡∏•‡∏±‡∏á‡∏ï‡∏£‡∏ß‡∏à‡∏™‡∏≠‡∏ö duplicate sequences...")
        
        seen_sequences = {}
        unique_sequences = []
        duplicates = []
        
        for seq in sequences:
            seq_hash = str(seq.seq).upper()
            
            if seq_hash in seen_sequences:
                # ‡∏û‡∏ö duplicate
                original_id = seen_sequences[seq_hash]
                duplicates.append((seq.id, original_id))
            else:
                # ‡∏ã‡∏µ‡πÄ‡∏Ñ‡∏ß‡∏ô‡∏ã‡πå‡πÉ‡∏´‡∏°‡πà
                seen_sequences[seq_hash] = seq.id
                unique_sequences.append(seq)
        
        print(f"  ‚úÖ ‡∏ã‡∏µ‡πÄ‡∏Ñ‡∏ß‡∏ô‡∏ã‡πå‡∏ó‡∏µ‡πà‡πÑ‡∏°‡πà‡∏ã‡πâ‡∏≥: {len(unique_sequences)}")
        print(f"  üîÑ Duplicates ‡∏ó‡∏µ‡πà‡∏ï‡∏±‡∏î‡∏≠‡∏≠‡∏Å: {len(duplicates)}")
        
        if duplicates:
            print("  ‡∏£‡∏≤‡∏¢‡∏Å‡∏≤‡∏£ duplicates ‡∏ó‡∏µ‡πà‡∏ï‡∏±‡∏î‡∏≠‡∏≠‡∏Å:")
            for dup_id, original_id in duplicates[:5]:
                print(f"    - {dup_id} (‡∏ã‡πâ‡∏≥‡∏Å‡∏±‡∏ö {original_id})")
            if len(duplicates) > 5:
                print(f"    ... ‡πÅ‡∏•‡∏∞‡∏≠‡∏µ‡∏Å {len(duplicates)-5} ‡∏£‡∏≤‡∏¢‡∏Å‡∏≤‡∏£")
        
        return unique_sequences
    
    def filter_by_quality(self, sequences):
        """‡∏Å‡∏£‡∏≠‡∏á‡∏ã‡∏µ‡πÄ‡∏Ñ‡∏ß‡∏ô‡∏ã‡πå‡∏ï‡∏≤‡∏°‡∏Ñ‡∏∏‡∏ì‡∏†‡∏≤‡∏û"""
        print("\nüß¨ ‡∏Å‡∏≥‡∏•‡∏±‡∏á‡∏Å‡∏£‡∏≠‡∏á‡∏ã‡∏µ‡πÄ‡∏Ñ‡∏ß‡∏ô‡∏ã‡πå‡∏ï‡∏≤‡∏°‡∏Ñ‡∏∏‡∏ì‡∏†‡∏≤‡∏û...")
        
        high_quality = []
        low_quality = []
        
        for seq in sequences:
            quality = self.check_sequence_quality(seq)
            
            if quality['is_high_quality']:
                high_quality.append(seq)
            else:
                low_quality.append((seq.id, quality))
        
        print(f"  ‚úÖ ‡∏Ñ‡∏∏‡∏ì‡∏†‡∏≤‡∏û‡∏î‡∏µ: {len(high_quality)} ‡∏ã‡∏µ‡πÄ‡∏Ñ‡∏ß‡∏ô‡∏ã‡πå")
        print(f"  ‚ùå ‡∏Ñ‡∏∏‡∏ì‡∏†‡∏≤‡∏û‡∏ï‡πà‡∏≥: {len(low_quality)} ‡∏ã‡∏µ‡πÄ‡∏Ñ‡∏ß‡∏ô‡∏ã‡πå")
        
        if low_quality:
            print("  ‡∏£‡∏≤‡∏¢‡∏Å‡∏≤‡∏£‡∏Ñ‡∏∏‡∏ì‡∏†‡∏≤‡∏û‡∏ï‡πà‡∏≥‡∏ó‡∏µ‡πà‡∏ï‡∏±‡∏î‡∏≠‡∏≠‡∏Å:")
            for seq_id, quality in low_quality[:5]:
                print(f"    - {seq_id}: {quality['invalid_percentage']:.1f}% ‡πÄ‡∏ö‡∏™‡πÑ‡∏°‡πà‡∏ñ‡∏π‡∏Å‡∏ï‡πâ‡∏≠‡∏á")
            if len(low_quality) > 5:
                print(f"    ... ‡πÅ‡∏•‡∏∞‡∏≠‡∏µ‡∏Å {len(low_quality)-5} ‡∏ã‡∏µ‡πÄ‡∏Ñ‡∏ß‡∏ô‡∏ã‡πå")
        
        return high_quality
    
    def filter_by_length(self):
        """‡∏Å‡∏£‡∏≠‡∏á‡∏ã‡∏µ‡πÄ‡∏Ñ‡∏ß‡∏ô‡∏ã‡πå‡∏ï‡∏≤‡∏°‡∏Ñ‡∏ß‡∏≤‡∏°‡∏¢‡∏≤‡∏ß ‡∏û‡∏£‡πâ‡∏≠‡∏°‡∏ï‡∏±‡∏î duplicates ‡πÅ‡∏•‡∏∞‡∏Ñ‡∏∏‡∏ì‡∏†‡∏≤‡∏û‡∏ï‡πà‡∏≥"""
        print("\n" + "="*50)
        print("üìè ‡πÄ‡∏£‡∏¥‡πà‡∏°‡∏Å‡∏£‡∏∞‡∏ö‡∏ß‡∏ô‡∏Å‡∏≤‡∏£‡∏Å‡∏£‡∏≠‡∏á‡∏ã‡∏µ‡πÄ‡∏Ñ‡∏ß‡∏ô‡∏ã‡πå")
        print("="*50)
        
        # ‡πÅ‡∏™‡∏î‡∏á‡∏™‡∏ñ‡∏¥‡∏ï‡∏¥‡πÄ‡∏£‡∏¥‡πà‡∏°‡∏ï‡πâ‡∏ô
        lengths = [len(seq.seq) for seq in self.sequences]
        print(f"üìä ‡∏™‡∏ñ‡∏¥‡∏ï‡∏¥‡πÄ‡∏£‡∏¥‡πà‡∏°‡∏ï‡πâ‡∏ô:")
        print(f"  ‡∏à‡∏≥‡∏ô‡∏ß‡∏ô‡∏ã‡∏µ‡πÄ‡∏Ñ‡∏ß‡∏ô‡∏ã‡πå: {len(self.sequences)}")
        print(f"  ‡∏Ñ‡∏ß‡∏≤‡∏°‡∏¢‡∏≤‡∏ß‡πÄ‡∏â‡∏•‡∏µ‡πà‡∏¢: {statistics.mean(lengths):,.0f} bp")
        print(f"  ‡∏Ñ‡∏ß‡∏≤‡∏°‡∏¢‡∏≤‡∏ß‡∏Å‡∏•‡∏≤‡∏á: {statistics.median(lengths):,.0f} bp")
        print(f"  ‡∏ä‡πà‡∏ß‡∏á‡∏Ñ‡∏ß‡∏≤‡∏°‡∏¢‡∏≤‡∏ß: {min(lengths):,.0f} - {max(lengths):,.0f} bp")
        
        # ‡∏Ç‡∏±‡πâ‡∏ô‡∏ó‡∏µ‡πà 1: ‡∏ï‡∏±‡∏î duplicates
        step1_sequences = self.remove_duplicates(self.sequences)
        
        # ‡∏Ç‡∏±‡πâ‡∏ô‡∏ó‡∏µ‡πà 2: ‡∏Å‡∏£‡∏≠‡∏á‡∏ï‡∏≤‡∏°‡∏Ñ‡∏∏‡∏ì‡∏†‡∏≤‡∏û
        step2_sequences = self.filter_by_quality(step1_sequences)
        
        # ‡∏Ç‡∏±‡πâ‡∏ô‡∏ó‡∏µ‡πà 3: ‡∏Å‡∏£‡∏≠‡∏á‡∏ï‡∏≤‡∏°‡∏Ñ‡∏ß‡∏≤‡∏°‡∏¢‡∏≤‡∏ß
        print("\nüìè ‡∏Å‡∏£‡∏≠‡∏á‡∏ï‡∏≤‡∏°‡∏Ñ‡∏ß‡∏≤‡∏°‡∏¢‡∏≤‡∏ß...")
        lengths = [len(seq.seq) for seq in step2_sequences]
        median_length = statistics.median(lengths)
        
        # ‡∏Ñ‡∏≥‡∏ô‡∏ß‡∏ì‡∏ä‡πà‡∏ß‡∏á‡∏ó‡∏µ‡πà‡∏¢‡∏≠‡∏°‡∏£‡∏±‡∏ö‡πÑ‡∏î‡πâ
        min_acceptable = median_length * (1 - self.length_tolerance)
        max_acceptable = median_length * (1 + self.length_tolerance)
        
        print(f"  ‡∏Ñ‡∏ß‡∏≤‡∏°‡∏¢‡∏≤‡∏ß‡∏Å‡∏•‡∏≤‡∏á: {median_length:,.0f} bp")
        print(f"  ‡∏ä‡πà‡∏ß‡∏á‡∏ó‡∏µ‡πà‡∏¢‡∏≠‡∏°‡∏£‡∏±‡∏ö: {min_acceptable:,.0f} - {max_acceptable:,.0f} bp")
        
        # ‡∏Å‡∏£‡∏≠‡∏á‡∏ã‡∏µ‡πÄ‡∏Ñ‡∏ß‡∏ô‡∏ã‡πå
        final_sequences = []
        length_removed = []
        
        for seq in step2_sequences:
            seq_length = len(seq.seq)
            if min_acceptable <= seq_length <= max_acceptable:
                final_sequences.append(seq)
            else:
                length_removed.append((seq.id, seq_length))
        
        self.filtered_sequences = final_sequences
        
        print(f"  ‚úÖ ‡∏ú‡πà‡∏≤‡∏ô‡∏Å‡∏≤‡∏£‡∏Ñ‡∏±‡∏î‡πÄ‡∏•‡∏∑‡∏≠‡∏Å: {len(final_sequences)} ‡∏ã‡∏µ‡πÄ‡∏Ñ‡∏ß‡∏ô‡∏ã‡πå")
        print(f"  ‚ùå ‡∏Ñ‡∏ß‡∏≤‡∏°‡∏¢‡∏≤‡∏ß‡πÑ‡∏°‡πà‡πÄ‡∏´‡∏°‡∏≤‡∏∞‡∏™‡∏°: {len(length_removed)} ‡∏ã‡∏µ‡πÄ‡∏Ñ‡∏ß‡∏ô‡∏ã‡πå")
        
        if length_removed:
            print("  ‡∏£‡∏≤‡∏¢‡∏Å‡∏≤‡∏£‡∏ó‡∏µ‡πà‡∏ï‡∏±‡∏î‡∏≠‡∏≠‡∏Å‡πÄ‡∏ô‡∏∑‡πà‡∏≠‡∏á‡∏à‡∏≤‡∏Å‡∏Ñ‡∏ß‡∏≤‡∏°‡∏¢‡∏≤‡∏ß:")
            for seq_id, length in length_removed[:5]:
                print(f"    - {seq_id}: {length:,} bp")
            if len(length_removed) > 5:
                print(f"    ... ‡πÅ‡∏•‡∏∞‡∏≠‡∏µ‡∏Å {len(length_removed)-5} ‡∏ã‡∏µ‡πÄ‡∏Ñ‡∏ß‡∏ô‡∏ã‡πå")
        
        # ‡∏™‡∏£‡∏∏‡∏õ‡∏ú‡∏•‡∏Å‡∏≤‡∏£‡∏Å‡∏£‡∏≠‡∏á
        total_removed = len(self.sequences) - len(final_sequences)
        print(f"\nüìã ‡∏™‡∏£‡∏∏‡∏õ‡∏Å‡∏≤‡∏£‡∏Å‡∏£‡∏≠‡∏á:")
        print(f"  ‡πÄ‡∏£‡∏¥‡πà‡∏°‡∏ï‡πâ‡∏ô: {len(self.sequences)} ‡∏ã‡∏µ‡πÄ‡∏Ñ‡∏ß‡∏ô‡∏ã‡πå")
        print(f"  ‡πÄ‡∏´‡∏•‡∏∑‡∏≠‡∏´‡∏•‡∏±‡∏á duplicate removal: {len(step1_sequences)}")
        print(f"  ‡πÄ‡∏´‡∏•‡∏∑‡∏≠‡∏´‡∏•‡∏±‡∏á quality filter: {len(step2_sequences)}")
        print(f"  ‡∏™‡∏∏‡∏î‡∏ó‡πâ‡∏≤‡∏¢: {len(final_sequences)} ‡∏ã‡∏µ‡πÄ‡∏Ñ‡∏ß‡∏ô‡∏ã‡πå")
        print(f"  ‡∏ï‡∏±‡∏î‡∏≠‡∏≠‡∏Å‡∏£‡∏ß‡∏°: {total_removed} ‡∏ã‡∏µ‡πÄ‡∏Ñ‡∏ß‡∏ô‡∏ã‡πå ({(total_removed/len(self.sequences)*100):.1f}%)")
    
    def _get_species_from_description(self, description):
        """‡πÅ‡∏¢‡∏Å‡∏ä‡∏∑‡πà‡∏≠‡∏™‡∏õ‡∏µ‡∏ä‡∏µ‡∏™‡πå‡∏à‡∏≤‡∏Å description"""
        # ‡∏û‡∏¢‡∏≤‡∏¢‡∏≤‡∏°‡πÅ‡∏¢‡∏Å‡∏ä‡∏∑‡πà‡∏≠‡∏™‡∏õ‡∏µ‡∏ä‡∏µ‡∏™‡πå‡∏à‡∏≤‡∏Å description
        parts = description.split()
        if len(parts) >= 2:
            return f"{parts[0]} {parts[1]}"
        return description
    
    def organize_sequences_by_priority(self):
        """‡∏à‡∏±‡∏î‡πÄ‡∏£‡∏µ‡∏¢‡∏á‡∏ã‡∏µ‡πÄ‡∏Ñ‡∏ß‡∏ô‡∏ã‡πå‡∏ï‡∏≤‡∏°‡∏Ñ‡∏ß‡∏≤‡∏°‡∏™‡∏≥‡∏Ñ‡∏±‡∏ç (species ‡∏´‡∏•‡∏±‡∏Å‡∏Å‡πà‡∏≠‡∏ô)"""
        print("\nüéØ ‡∏à‡∏±‡∏î‡πÄ‡∏£‡∏µ‡∏¢‡∏á‡∏•‡∏≥‡∏î‡∏±‡∏ö‡∏Ñ‡∏ß‡∏≤‡∏°‡∏™‡∏≥‡∏Ñ‡∏±‡∏ç...")
        
        priority_sequences = []
        other_sequences = []
        
        for seq in self.filtered_sequences:
            species_name = self._get_species_from_description(seq.description)
            is_target = any(target in species_name.lower() for target in 
                          [t.lower() for t in self.target_species])
            
            if is_target:
                priority_sequences.append(seq)
            else:
                other_sequences.append(seq)
        
        # ‡∏£‡∏ß‡∏°‡∏Å‡∏±‡∏ô (species ‡∏´‡∏•‡∏±‡∏Å‡∏Å‡πà‡∏≠‡∏ô)
        organized_sequences = priority_sequences + other_sequences
        
        print(f"  üéØ ‡∏™‡∏õ‡∏µ‡∏ä‡∏µ‡∏™‡πå‡∏´‡∏•‡∏±‡∏Å: {len(priority_sequences)} ‡∏ã‡∏µ‡πÄ‡∏Ñ‡∏ß‡∏ô‡∏ã‡πå")
        print(f"  üìä ‡∏™‡∏õ‡∏µ‡∏ä‡∏µ‡∏™‡πå‡∏≠‡∏∑‡πà‡∏ô: {len(other_sequences)} ‡∏ã‡∏µ‡πÄ‡∏Ñ‡∏ß‡∏ô‡∏ã‡πå")
        
        return organized_sequences
    
    def _calculate_alignment_quality(self, seq1, seq2):
        """‡∏Ñ‡∏≥‡∏ô‡∏ß‡∏ì‡∏Ñ‡∏∏‡∏ì‡∏†‡∏≤‡∏û‡∏Ç‡∏≠‡∏á‡∏Å‡∏≤‡∏£ alignment ‡∏£‡∏∞‡∏´‡∏ß‡πà‡∏≤‡∏á 2 ‡∏ã‡∏µ‡πÄ‡∏Ñ‡∏ß‡∏ô‡∏ã‡πå"""
        try:
            aligner = PairwiseAligner()
            aligner.match_score = 2
            aligner.mismatch_score = -1
            aligner.open_gap_score = -2
            aligner.extend_gap_score = -0.5
            
            alignments = aligner.align(str(seq1.seq), str(seq2.seq))
            best_alignment = alignments[0]
            
            # ‡∏Ñ‡∏≥‡∏ô‡∏ß‡∏ì‡∏Ñ‡∏∞‡πÅ‡∏ô‡∏ô‡∏ó‡∏µ‡πà‡∏õ‡∏£‡∏±‡∏ö‡πÅ‡∏•‡πâ‡∏ß (0-1)
            max_possible_score = min(len(seq1.seq), len(seq2.seq)) * 2
            normalized_score = best_alignment.score / max_possible_score
            
            return max(0, min(1, normalized_score))
            
        except Exception as e:
            print(f"    ‚ö†Ô∏è ‡πÑ‡∏°‡πà‡∏™‡∏≤‡∏°‡∏≤‡∏£‡∏ñ‡∏Ñ‡∏≥‡∏ô‡∏ß‡∏ì‡∏Ñ‡∏∞‡πÅ‡∏ô‡∏ô‡πÑ‡∏î‡πâ: {e}")
            return 0.0
    
    def progressive_alignment(self):
        """‡∏ó‡∏≥ alignment ‡πÅ‡∏ö‡∏ö‡∏Ñ‡πà‡∏≠‡∏¢‡πÄ‡∏õ‡πá‡∏ô‡∏Ñ‡πà‡∏≠‡∏¢‡πÑ‡∏õ ‡∏û‡∏£‡πâ‡∏≠‡∏°‡∏ï‡∏£‡∏ß‡∏à‡∏™‡∏≠‡∏ö‡∏Ñ‡∏∏‡∏ì‡∏†‡∏≤‡∏û‡πÅ‡∏•‡∏∞‡∏£‡∏≤‡∏¢‡∏á‡∏≤‡∏ô‡∏ó‡∏∏‡∏Å‡∏ä‡πà‡∏ß‡∏á"""
        print("\n" + "="*50)
        print("üîÑ ‡πÄ‡∏£‡∏¥‡πà‡∏°‡∏Å‡∏≤‡∏£ Progressive Alignment")
        print("="*50)
        
        organized_sequences = self.organize_sequences_by_priority()
        
        if len(organized_sequences) < 2:
            print("‚ùå ‡∏ï‡πâ‡∏≠‡∏á‡∏°‡∏µ‡∏ã‡∏µ‡πÄ‡∏Ñ‡∏ß‡∏ô‡∏ã‡πå‡∏≠‡∏¢‡πà‡∏≤‡∏á‡∏ô‡πâ‡∏≠‡∏¢ 2 ‡∏ï‡∏±‡∏ß‡∏™‡∏≥‡∏´‡∏£‡∏±‡∏ö‡∏Å‡∏≤‡∏£ alignment")
            return None
        
        accepted_sequences = []
        rejected_sequences = []
        alignment_stats = []
        
        # ‡πÄ‡∏£‡∏¥‡πà‡∏°‡∏ï‡πâ‡∏ô‡∏î‡πâ‡∏ß‡∏¢‡∏ã‡∏µ‡πÄ‡∏Ñ‡∏ß‡∏ô‡∏ã‡πå‡πÅ‡∏£‡∏Å (‡∏Ñ‡∏ß‡∏£‡πÄ‡∏õ‡πá‡∏ô target species)
        accepted_sequences.append(organized_sequences[0])
        print(f"üéØ ‡πÄ‡∏£‡∏¥‡πà‡∏°‡∏ï‡πâ‡∏ô‡∏î‡πâ‡∏ß‡∏¢‡∏ã‡∏µ‡πÄ‡∏Ñ‡∏ß‡∏ô‡∏ã‡πå reference: {organized_sequences[0].id}")
        print(f"   Species: {self._get_species_from_description(organized_sequences[0].description)}")
        print(f"   ‡∏Ñ‡∏ß‡∏≤‡∏°‡∏¢‡∏≤‡∏ß: {len(organized_sequences[0].seq):,} bp")
        
        # ‡∏ï‡∏±‡∏ß‡πÅ‡∏õ‡∏£‡∏™‡∏≥‡∏´‡∏£‡∏±‡∏ö‡∏Å‡∏≤‡∏£‡∏£‡∏≤‡∏¢‡∏á‡∏≤‡∏ô
        report_interval = max(1, len(organized_sequences) // 10)  # ‡∏£‡∏≤‡∏¢‡∏á‡∏≤‡∏ô‡∏ó‡∏∏‡∏Å 10%
        total_sequences = len(organized_sequences) - 1
        
        # ‡∏ó‡∏î‡∏™‡∏≠‡∏ö‡∏ã‡∏µ‡πÄ‡∏Ñ‡∏ß‡∏ô‡∏ã‡πå‡∏ó‡∏µ‡∏•‡∏∞‡∏ï‡∏±‡∏ß
        for i, candidate_seq in enumerate(organized_sequences[1:], 1):
            progress_percent = (i / total_sequences) * 100
            
            print(f"\nüìù [{i:3d}/{total_sequences}] ({progress_percent:5.1f}%) ‡∏ó‡∏î‡∏™‡∏≠‡∏ö: {candidate_seq.id}")
            print(f"    Species: {self._get_species_from_description(candidate_seq.description)}")
            print(f"    ‡∏Ñ‡∏ß‡∏≤‡∏°‡∏¢‡∏≤‡∏ß: {len(candidate_seq.seq):,} bp")
            
            # ‡πÄ‡∏õ‡∏£‡∏µ‡∏¢‡∏ö‡πÄ‡∏ó‡∏µ‡∏¢‡∏ö‡∏Å‡∏±‡∏ö‡∏ã‡∏µ‡πÄ‡∏Ñ‡∏ß‡∏ô‡∏ã‡πå‡∏ó‡∏µ‡πà‡∏¢‡∏≠‡∏°‡∏£‡∏±‡∏ö‡πÅ‡∏•‡πâ‡∏ß
            quality_scores = []
            comparison_details = []
            
            # ‡πÄ‡∏•‡∏∑‡∏≠‡∏Å‡∏ã‡∏µ‡πÄ‡∏Ñ‡∏ß‡∏ô‡∏ã‡πå‡∏ó‡∏µ‡πà‡∏à‡∏∞‡πÉ‡∏ä‡πâ‡πÄ‡∏õ‡∏£‡∏µ‡∏¢‡∏ö‡πÄ‡∏ó‡∏µ‡∏¢‡∏ö (‡∏™‡∏π‡∏á‡∏™‡∏∏‡∏î 5 ‡∏ï‡∏±‡∏ß‡∏•‡πà‡∏≤‡∏™‡∏∏‡∏î)
            comparison_seqs = accepted_sequences[-5:] if len(accepted_sequences) > 5 else accepted_sequences
            
            for j, accepted_seq in enumerate(comparison_seqs):
                score = self._calculate_alignment_quality(candidate_seq, accepted_seq)
                quality_scores.append(score)
                comparison_details.append({
                    'reference': accepted_seq.id[:30] + "..." if len(accepted_seq.id) > 30 else accepted_seq.id,
                    'score': score
                })
                print(f"    vs {accepted_seq.id[:25]:<25}...: {score:.3f}")
            
            # ‡∏ï‡∏±‡∏î‡∏™‡∏¥‡∏ô‡πÉ‡∏à‡∏¢‡∏≠‡∏°‡∏£‡∏±‡∏ö‡∏´‡∏£‡∏∑‡∏≠‡∏õ‡∏è‡∏¥‡πÄ‡∏™‡∏ò
            avg_score = statistics.mean(quality_scores) if quality_scores else 0
            max_score = max(quality_scores) if quality_scores else 0
            min_score = min(quality_scores) if quality_scores else 0
            
            # ‡πÄ‡∏Å‡∏ì‡∏ë‡πå‡∏Å‡∏≤‡∏£‡∏ï‡∏±‡∏î‡∏™‡∏¥‡∏ô‡πÉ‡∏à (‡πÉ‡∏ä‡πâ‡∏Ñ‡∏∞‡πÅ‡∏ô‡∏ô‡πÄ‡∏â‡∏•‡∏µ‡πà‡∏¢)
            decision_reason = ""
            if avg_score >= self.min_alignment_score:
                accepted_sequences.append(candidate_seq)
                decision = "‚úÖ ‡∏¢‡∏≠‡∏°‡∏£‡∏±‡∏ö"
                decision_reason = f"‡∏Ñ‡∏∞‡πÅ‡∏ô‡∏ô‡πÄ‡∏â‡∏•‡∏µ‡πà‡∏¢ {avg_score:.3f} ‚â• {self.min_alignment_score}"
            else:
                rejected_sequences.append((candidate_seq, avg_score, comparison_details))
                decision = "‚ùå ‡∏õ‡∏è‡∏¥‡πÄ‡∏™‡∏ò"
                decision_reason = f"‡∏Ñ‡∏∞‡πÅ‡∏ô‡∏ô‡πÄ‡∏â‡∏•‡∏µ‡πà‡∏¢ {avg_score:.3f} < {self.min_alignment_score}"
            
            print(f"    üìä ‡∏Ñ‡∏∞‡πÅ‡∏ô‡∏ô‡∏™‡∏ñ‡∏¥‡∏ï‡∏¥: ‡πÄ‡∏â‡∏•‡∏µ‡πà‡∏¢={avg_score:.3f}, ‡∏™‡∏π‡∏á‡∏™‡∏∏‡∏î={max_score:.3f}, ‡∏ï‡πà‡∏≥‡∏™‡∏∏‡∏î={min_score:.3f}")
            print(f"    {decision}: {decision_reason}")
            
            # ‡πÄ‡∏Å‡πá‡∏ö‡∏™‡∏ñ‡∏¥‡∏ï‡∏¥‡∏™‡∏≥‡∏´‡∏£‡∏±‡∏ö‡∏Å‡∏≤‡∏£‡∏£‡∏≤‡∏¢‡∏á‡∏≤‡∏ô
            alignment_stats.append({
                'sequence_id': candidate_seq.id,
                'avg_score': avg_score,
                'max_score': max_score,
                'min_score': min_score,
                'accepted': avg_score >= self.min_alignment_score,
                'comparisons': len(quality_scores)
            })
            
            # ‡∏£‡∏≤‡∏¢‡∏á‡∏≤‡∏ô‡∏Ñ‡∏ß‡∏≤‡∏°‡∏Ñ‡∏∑‡∏ö‡∏´‡∏ô‡πâ‡∏≤‡∏ó‡∏∏‡∏Å‡∏ä‡πà‡∏ß‡∏á
            if i % report_interval == 0 or i == total_sequences:
                self._print_progress_report(i, total_sequences, len(accepted_sequences), len(rejected_sequences))
        
        # ‡∏™‡∏£‡∏∏‡∏õ‡∏ú‡∏•‡∏Å‡∏≤‡∏£‡∏Ñ‡∏±‡∏î‡πÄ‡∏•‡∏∑‡∏≠‡∏Å
        print(f"\n" + "="*50)
        print("üìä ‡∏™‡∏£‡∏∏‡∏õ‡∏ú‡∏• Progressive Alignment")
        print("="*50)
        print(f"‚úÖ ‡∏¢‡∏≠‡∏°‡∏£‡∏±‡∏ö: {len(accepted_sequences)} ‡∏ã‡∏µ‡πÄ‡∏Ñ‡∏ß‡∏ô‡∏ã‡πå")
        print(f"‚ùå ‡∏õ‡∏è‡∏¥‡πÄ‡∏™‡∏ò: {len(rejected_sequences)} ‡∏ã‡∏µ‡πÄ‡∏Ñ‡∏ß‡∏ô‡∏ã‡πå")
        print(f"üìà ‡∏≠‡∏±‡∏ï‡∏£‡∏≤‡∏Å‡∏≤‡∏£‡∏¢‡∏≠‡∏°‡∏£‡∏±‡∏ö: {(len(accepted_sequences)/len(organized_sequences)*100):.1f}%")
        
        # ‡πÅ‡∏™‡∏î‡∏á‡∏™‡∏ñ‡∏¥‡∏ï‡∏¥‡∏Ñ‡∏∞‡πÅ‡∏ô‡∏ô
        if alignment_stats:
            scores = [stat['avg_score'] for stat in alignment_stats]
            print(f"\nüìä ‡∏™‡∏ñ‡∏¥‡∏ï‡∏¥‡∏Ñ‡∏∞‡πÅ‡∏ô‡∏ô alignment:")
            print(f"  ‡∏Ñ‡∏∞‡πÅ‡∏ô‡∏ô‡πÄ‡∏â‡∏•‡∏µ‡πà‡∏¢: {statistics.mean(scores):.3f}")
            print(f"  ‡∏Ñ‡∏∞‡πÅ‡∏ô‡∏ô‡∏Å‡∏•‡∏≤‡∏á: {statistics.median(scores):.3f}")
            print(f"  ‡∏™‡∏π‡∏á‡∏™‡∏∏‡∏î: {max(scores):.3f}")
            print(f"  ‡∏ï‡πà‡∏≥‡∏™‡∏∏‡∏î: {min(scores):.3f}")
        
        # ‡πÅ‡∏™‡∏î‡∏á‡∏£‡∏≤‡∏¢‡∏•‡∏∞‡πÄ‡∏≠‡∏µ‡∏¢‡∏î‡∏ã‡∏µ‡πÄ‡∏Ñ‡∏ß‡∏ô‡∏ã‡πå‡∏ó‡∏µ‡πà‡∏õ‡∏è‡∏¥‡πÄ‡∏™‡∏ò
        if rejected_sequences:
            print(f"\n‚ùå ‡∏£‡∏≤‡∏¢‡∏•‡∏∞‡πÄ‡∏≠‡∏µ‡∏¢‡∏î‡∏ã‡∏µ‡πÄ‡∏Ñ‡∏ß‡∏ô‡∏ã‡πå‡∏ó‡∏µ‡πà‡∏õ‡∏è‡∏¥‡πÄ‡∏™‡∏ò:")
            for seq, score, details in rejected_sequences[:10]:  # ‡πÅ‡∏™‡∏î‡∏á‡πÅ‡∏Ñ‡πà 10 ‡∏≠‡∏±‡∏ô‡πÅ‡∏£‡∏Å
                species = self._get_species_from_description(seq.description)
                print(f"  - {seq.id[:40]:<40} (Score: {score:.3f}) - {species}")
            if len(rejected_sequences) > 10:
                print(f"  ... ‡πÅ‡∏•‡∏∞‡∏≠‡∏µ‡∏Å {len(rejected_sequences)-10} ‡∏ã‡∏µ‡πÄ‡∏Ñ‡∏ß‡∏ô‡∏ã‡πå")
        
        # ‡∏ö‡∏±‡∏ô‡∏ó‡∏∂‡∏Å‡∏ã‡∏µ‡πÄ‡∏Ñ‡∏ß‡∏ô‡∏ã‡πå‡∏ó‡∏µ‡πà‡∏ú‡πà‡∏≤‡∏ô‡∏Å‡∏≤‡∏£‡∏Ñ‡∏±‡∏î‡πÄ‡∏•‡∏∑‡∏≠‡∏Å
        filtered_file = os.path.join(self.output_dir, "filtered_sequences.fasta")
        SeqIO.write(accepted_sequences, filtered_file, "fasta")
        print(f"\nüíæ ‡∏ö‡∏±‡∏ô‡∏ó‡∏∂‡∏Å‡∏ã‡∏µ‡πÄ‡∏Ñ‡∏ß‡∏ô‡∏ã‡πå‡∏ó‡∏µ‡πà‡∏Ñ‡∏±‡∏î‡πÄ‡∏•‡∏∑‡∏≠‡∏Å‡πÅ‡∏•‡πâ‡∏ß: {filtered_file}")
        
        return accepted_sequences, filtered_file
    
    def _print_progress_report(self, current, total, accepted, rejected):
        """‡∏û‡∏¥‡∏°‡∏û‡πå‡∏£‡∏≤‡∏¢‡∏á‡∏≤‡∏ô‡∏Ñ‡∏ß‡∏≤‡∏°‡∏Ñ‡∏∑‡∏ö‡∏´‡∏ô‡πâ‡∏≤"""
        progress = (current / total) * 100
        print(f"\nüìà ‡∏£‡∏≤‡∏¢‡∏á‡∏≤‡∏ô‡∏Ñ‡∏ß‡∏≤‡∏°‡∏Ñ‡∏∑‡∏ö‡∏´‡∏ô‡πâ‡∏≤ ({progress:.1f}%):")
        print(f"    ‡∏ó‡∏î‡∏™‡∏≠‡∏ö‡πÅ‡∏•‡πâ‡∏ß: {current}/{total} ‡∏ã‡∏µ‡πÄ‡∏Ñ‡∏ß‡∏ô‡∏ã‡πå")
        print(f"    ‡∏¢‡∏≠‡∏°‡∏£‡∏±‡∏ö: {accepted} ‡∏ã‡∏µ‡πÄ‡∏Ñ‡∏ß‡∏ô‡∏ã‡πå")
        print(f"    ‡∏õ‡∏è‡∏¥‡πÄ‡∏™‡∏ò: {rejected} ‡∏ã‡∏µ‡πÄ‡∏Ñ‡∏ß‡∏ô‡∏ã‡πå")
        print(f"    ‡∏≠‡∏±‡∏ï‡∏£‡∏≤‡∏Å‡∏≤‡∏£‡∏¢‡∏≠‡∏°‡∏£‡∏±‡∏ö: {(accepted/(accepted+rejected)*100):.1f}%")
    
    def run_mafft_alignment(self, input_file, output_file):
        """‡∏£‡∏±‡∏ô MAFFT alignment"""
        print(f"\nüîß ‡∏£‡∏±‡∏ô MAFFT alignment...")
        try:
            # ‡πÉ‡∏ä‡πâ‡∏û‡∏≤‡∏£‡∏≤‡∏°‡∏¥‡πÄ‡∏ï‡∏≠‡∏£‡πå‡∏ó‡∏µ‡πà‡πÄ‡∏´‡∏°‡∏≤‡∏∞‡∏™‡∏°‡∏™‡∏≥‡∏´‡∏£‡∏±‡∏ö‡∏ã‡∏µ‡πÄ‡∏Ñ‡∏ß‡∏ô‡∏ã‡πå‡∏ó‡∏µ‡πà‡∏Ñ‡∏•‡πâ‡∏≤‡∏¢‡∏Å‡∏±‡∏ô
            result = subprocess.run([
                'mafft',
                '--adjustdirection',    # ‡∏õ‡∏£‡∏±‡∏ö‡∏ó‡∏¥‡∏®‡∏ó‡∏≤‡∏á
                '--maxiterate', '1000', # ‡∏à‡∏≥‡∏ô‡∏ß‡∏ô‡∏£‡∏≠‡∏ö‡∏™‡∏π‡∏á‡∏™‡∏∏‡∏î
                '--globalpair',         # global pairwise alignment
                '--thread', '4',        # ‡πÉ‡∏ä‡πâ 4 threads
                input_file
            ], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, check=True)
            
            # ‡πÄ‡∏Ç‡∏µ‡∏¢‡∏ô‡∏ú‡∏•‡∏•‡∏±‡∏û‡∏ò‡πå
            with open(output_file, 'w') as f:
                f.write(result.stdout)
            
            print(f"  ‚úÖ MAFFT alignment ‡πÄ‡∏™‡∏£‡πá‡∏à‡∏™‡∏¥‡πâ‡∏ô: {output_file}")
            return output_file
            
        except subprocess.CalledProcessError as e:
            print(f"  ‚ùå MAFFT ‡∏•‡πâ‡∏°‡πÄ‡∏´‡∏•‡∏ß: {e.stderr}")
            return None
    
    def analyze_alignment_quality(self, alignment_file):
        """‡∏ß‡∏¥‡πÄ‡∏Ñ‡∏£‡∏≤‡∏∞‡∏´‡πå‡∏Ñ‡∏∏‡∏ì‡∏†‡∏≤‡∏û‡∏Ç‡∏≠‡∏á alignment ‡∏≠‡∏¢‡πà‡∏≤‡∏á‡∏•‡∏∞‡πÄ‡∏≠‡∏µ‡∏¢‡∏î"""
        print(f"\n" + "="*50)
        print("üìà ‡∏ß‡∏¥‡πÄ‡∏Ñ‡∏£‡∏≤‡∏∞‡∏´‡πå‡∏Ñ‡∏∏‡∏ì‡∏†‡∏≤‡∏û Alignment")
        print("="*50)
        
        try:
            alignment = AlignIO.read(alignment_file, 'fasta')
            
            # ‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡∏û‡∏∑‡πâ‡∏ô‡∏ê‡∏≤‡∏ô
            alignment_length = alignment.get_alignment_length()
            num_sequences = len(alignment)
            
            print(f"üìä ‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡∏û‡∏∑‡πâ‡∏ô‡∏ê‡∏≤‡∏ô:")
            print(f"  üìè ‡∏Ñ‡∏ß‡∏≤‡∏°‡∏¢‡∏≤‡∏ß alignment: {alignment_length:,} ‡∏ï‡∏≥‡πÅ‡∏´‡∏ô‡πà‡∏á")
            print(f"  üß¨ ‡∏à‡∏≥‡∏ô‡∏ß‡∏ô‡∏ã‡∏µ‡πÄ‡∏Ñ‡∏ß‡∏ô‡∏ã‡πå: {num_sequences}")
            
            # ‡∏ß‡∏¥‡πÄ‡∏Ñ‡∏£‡∏≤‡∏∞‡∏´‡πå gaps
            print(f"\nüï≥Ô∏è  ‡∏Å‡∏≤‡∏£‡∏ß‡∏¥‡πÄ‡∏Ñ‡∏£‡∏≤‡∏∞‡∏´‡πå Gaps:")
            gap_stats = self._analyze_gaps(alignment)
            
            print(f"  ‡∏£‡∏ß‡∏° gaps ‡∏ó‡∏±‡πâ‡∏á‡∏´‡∏°‡∏î: {gap_stats['total_gaps']:,} ({gap_stats['gap_percentage']:.1f}%)")
            print(f"  ‡πÄ‡∏â‡∏•‡∏µ‡πà‡∏¢‡∏ï‡πà‡∏≠‡∏ã‡∏µ‡πÄ‡∏Ñ‡∏ß‡∏ô‡∏ã‡πå: {gap_stats['avg_gaps_per_seq']:.1f} gaps")
            print(f"  ‡∏ã‡∏µ‡πÄ‡∏Ñ‡∏ß‡∏ô‡∏ã‡πå‡∏ó‡∏µ‡πà‡∏°‡∏µ gaps ‡∏°‡∏≤‡∏Å‡∏™‡∏∏‡∏î: {gap_stats['max_gaps_seq']:.1f}%")
            print(f"  ‡∏ã‡∏µ‡πÄ‡∏Ñ‡∏ß‡∏ô‡∏ã‡πå‡∏ó‡∏µ‡πà‡∏°‡∏µ gaps ‡∏ô‡πâ‡∏≠‡∏¢‡∏™‡∏∏‡∏î: {gap_stats['min_gaps_seq']:.1f}%")
            
            # ‡∏ß‡∏¥‡πÄ‡∏Ñ‡∏£‡∏≤‡∏∞‡∏´‡πå conservation
            print(f"\nüî¨ ‡∏Å‡∏≤‡∏£‡∏ß‡∏¥‡πÄ‡∏Ñ‡∏£‡∏≤‡∏∞‡∏´‡πå Conservation:")
            conservation_stats = self._analyze_conservation(alignment)
            
            print(f"  ‡∏ï‡∏≥‡πÅ‡∏´‡∏ô‡πà‡∏á‡∏ó‡∏µ‡πà conserved 100%: {conservation_stats['fully_conserved']:,} ({conservation_stats['fully_conserved_percent']:.1f}%)")
            print(f"  ‡∏ï‡∏≥‡πÅ‡∏´‡∏ô‡πà‡∏á‡∏ó‡∏µ‡πà conserved ‚â•90%: {conservation_stats['highly_conserved']:,} ({conservation_stats['highly_conserved_percent']:.1f}%)")
            print(f"  ‡∏ï‡∏≥‡πÅ‡∏´‡∏ô‡πà‡∏á‡∏ó‡∏µ‡πà conserved ‚â•70%: {conservation_stats['moderately_conserved']:,} ({conservation_stats['moderately_conserved_percent']:.1f}%)")
            print(f"  ‡∏ï‡∏≥‡πÅ‡∏´‡∏ô‡πà‡∏á‡∏ó‡∏µ‡πà variable: {conservation_stats['variable']:,} ({conservation_stats['variable_percent']:.1f}%)")
            
            # ‡∏ß‡∏¥‡πÄ‡∏Ñ‡∏£‡∏≤‡∏∞‡∏´‡πå‡∏ã‡∏µ‡πÄ‡∏Ñ‡∏ß‡∏ô‡∏ã‡πå‡πÅ‡∏ï‡πà‡∏•‡∏∞‡∏ï‡∏±‡∏ß
            print(f"\nüß¨ ‡∏Å‡∏≤‡∏£‡∏ß‡∏¥‡πÄ‡∏Ñ‡∏£‡∏≤‡∏∞‡∏´‡πå‡∏ã‡∏µ‡πÄ‡∏Ñ‡∏ß‡∏ô‡∏ã‡πå‡πÅ‡∏ï‡πà‡∏•‡∏∞‡∏ï‡∏±‡∏ß:")
            seq_stats = self._analyze_individual_sequences(alignment)
            
            # ‡πÅ‡∏™‡∏î‡∏á‡∏ã‡∏µ‡πÄ‡∏Ñ‡∏ß‡∏ô‡∏ã‡πå‡∏ó‡∏µ‡πà‡∏°‡∏µ‡∏Ñ‡∏∏‡∏ì‡∏†‡∏≤‡∏û‡∏î‡∏µ‡∏ó‡∏µ‡πà‡∏™‡∏∏‡∏î‡πÅ‡∏•‡∏∞‡πÅ‡∏¢‡πà‡∏ó‡∏µ‡πà‡∏™‡∏∏‡∏î
            best_seq = min(seq_stats, key=lambda x: x['gap_percent'])
            worst_seq = max(seq_stats, key=lambda x: x['gap_percent'])
            
            print(f"  ‚úÖ ‡∏Ñ‡∏∏‡∏ì‡∏†‡∏≤‡∏û‡∏î‡∏µ‡∏ó‡∏µ‡πà‡∏™‡∏∏‡∏î: {best_seq['id'][:50]} ({best_seq['gap_percent']:.1f}% gaps)")
            print(f"  ‚ö†Ô∏è  ‡∏Ñ‡∏∏‡∏ì‡∏†‡∏≤‡∏û‡πÅ‡∏¢‡πà‡∏ó‡∏µ‡πà‡∏™‡∏∏‡∏î: {worst_seq['id'][:50]} ({worst_seq['gap_percent']:.1f}% gaps)")
            
            # ‡∏õ‡∏£‡∏∞‡πÄ‡∏°‡∏¥‡∏ô‡∏Ñ‡∏∏‡∏ì‡∏†‡∏≤‡∏û‡πÇ‡∏î‡∏¢‡∏£‡∏ß‡∏°
            print(f"\nüéØ ‡∏õ‡∏£‡∏∞‡πÄ‡∏°‡∏¥‡∏ô‡∏Ñ‡∏∏‡∏ì‡∏†‡∏≤‡∏û‡πÇ‡∏î‡∏¢‡∏£‡∏ß‡∏°:")
            overall_quality = self._evaluate_overall_quality(gap_stats, conservation_stats)
            
            print(f"  ‡∏Ñ‡∏∞‡πÅ‡∏ô‡∏ô‡∏Ñ‡∏∏‡∏ì‡∏†‡∏≤‡∏û: {overall_quality['score']:.2f}/10")
            print(f"  ‡∏£‡∏∞‡∏î‡∏±‡∏ö‡∏Ñ‡∏∏‡∏ì‡∏†‡∏≤‡∏û: {overall_quality['grade']}")
            print(f"  ‡∏Ñ‡∏≥‡πÅ‡∏ô‡∏∞‡∏ô‡∏≥: {overall_quality['recommendation']}")
            
            # ‡∏™‡∏£‡∏∏‡∏õ‡∏™‡∏ñ‡∏¥‡∏ï‡∏¥‡∏™‡∏≥‡∏Ñ‡∏±‡∏ç
            summary_stats = {
                'length': alignment_length,
                'sequences': num_sequences,
                'gap_percentage': gap_stats['gap_percentage'],
                'conservation_score': conservation_stats['conservation_score'],
                'quality_score': overall_quality['score'],
                'quality_grade': overall_quality['grade']
            }
            
            return summary_stats
            
        except Exception as e:
            print(f"  ‚ùå ‡πÑ‡∏°‡πà‡∏™‡∏≤‡∏°‡∏≤‡∏£‡∏ñ‡∏ß‡∏¥‡πÄ‡∏Ñ‡∏£‡∏≤‡∏∞‡∏´‡πå‡πÑ‡∏î‡πâ: {e}")
            return None
    
    def _analyze_gaps(self, alignment):
        """‡∏ß‡∏¥‡πÄ‡∏Ñ‡∏£‡∏≤‡∏∞‡∏´‡πå gaps ‡πÉ‡∏ô‡∏£‡∏≤‡∏¢‡∏•‡∏∞‡πÄ‡∏≠‡∏µ‡∏¢‡∏î"""
        total_gaps = 0
        total_bases = 0
        seq_gap_percents = []
        
        for record in alignment:
            sequence = str(record.seq)
            gaps = sequence.count('-')
            total_gaps += gaps
            total_bases += len(sequence)
            seq_gap_percent = (gaps / len(sequence)) * 100
            seq_gap_percents.append(seq_gap_percent)
        
        return {
            'total_gaps': total_gaps,
            'total_bases': total_bases,
            'gap_percentage': (total_gaps / total_bases) * 100,
            'avg_gaps_per_seq': statistics.mean(seq_gap_percents),
            'max_gaps_seq': max(seq_gap_percents),
            'min_gaps_seq': min(seq_gap_percents)
        }
    
    def _analyze_conservation(self, alignment):
        """‡∏ß‡∏¥‡πÄ‡∏Ñ‡∏£‡∏≤‡∏∞‡∏´‡πå conservation ‡πÉ‡∏ô‡πÅ‡∏ï‡πà‡∏•‡∏∞‡∏ï‡∏≥‡πÅ‡∏´‡∏ô‡πà‡∏á"""
        alignment_length = alignment.get_alignment_length()
        num_sequences = len(alignment)
        
        fully_conserved = 0
        highly_conserved = 0
        moderately_conserved = 0
        variable = 0
        conservation_scores = []
        
        for pos in range(alignment_length):
            # ‡∏î‡∏∂‡∏á‡πÄ‡∏ö‡∏™‡πÉ‡∏ô‡∏ï‡∏≥‡πÅ‡∏´‡∏ô‡πà‡∏á‡∏ô‡∏µ‡πâ‡∏à‡∏≤‡∏Å‡∏ó‡∏∏‡∏Å‡∏ã‡∏µ‡πÄ‡∏Ñ‡∏ß‡∏ô‡∏ã‡πå
            bases = [alignment[i].seq[pos].upper() for i in range(num_sequences)]
            
            # ‡∏ô‡∏±‡∏ö‡πÄ‡∏ö‡∏™‡∏ó‡∏µ‡πà‡πÑ‡∏°‡πà‡πÉ‡∏ä‡πà gap
            non_gap_bases = [base for base in bases if base != '-']
            
            if len(non_gap_bases) == 0:
                conservation_scores.append(0)
                variable += 1
                continue
            
            # ‡∏ô‡∏±‡∏ö‡πÄ‡∏ö‡∏™‡∏ó‡∏µ‡πà‡πÄ‡∏´‡∏°‡∏∑‡∏≠‡∏ô‡∏Å‡∏±‡∏ô‡∏°‡∏≤‡∏Å‡∏ó‡∏µ‡πà‡∏™‡∏∏‡∏î
            from collections import Counter
            base_counts = Counter(non_gap_bases)
            most_common_count = base_counts.most_common(1)[0][1]
            
            # ‡∏Ñ‡∏≥‡∏ô‡∏ß‡∏ì‡πÄ‡∏õ‡∏≠‡∏£‡πå‡πÄ‡∏ã‡πá‡∏ô‡∏ï‡πå conservation
            conservation_percent = (most_common_count / len(non_gap_bases)) * 100
            conservation_scores.append(conservation_percent)
            
            # ‡∏à‡∏±‡∏î‡∏´‡∏°‡∏ß‡∏î‡∏´‡∏°‡∏π‡πà
            if conservation_percent == 100:
                fully_conserved += 1
            elif conservation_percent >= 90:
                highly_conserved += 1
            elif conservation_percent >= 70:
                moderately_conserved += 1
            else:
                variable += 1
        
        return {
            'fully_conserved': fully_conserved,
            'fully_conserved_percent': (fully_conserved / alignment_length) * 100,
            'highly_conserved': highly_conserved,
            'highly_conserved_percent': (highly_conserved / alignment_length) * 100,
            'moderately_conserved': moderately_conserved,
            'moderately_conserved_percent': (moderately_conserved / alignment_length) * 100,
            'variable': variable,
            'variable_percent': (variable / alignment_length) * 100,
            'conservation_score': statistics.mean(conservation_scores)
        }
    
    def _analyze_individual_sequences(self, alignment):
        """‡∏ß‡∏¥‡πÄ‡∏Ñ‡∏£‡∏≤‡∏∞‡∏´‡πå‡∏ã‡∏µ‡πÄ‡∏Ñ‡∏ß‡∏ô‡∏ã‡πå‡πÅ‡∏ï‡πà‡∏•‡∏∞‡∏ï‡∏±‡∏ß"""
        seq_stats = []
        
        for record in alignment:
            sequence = str(record.seq)
            gaps = sequence.count('-')
            gap_percent = (gaps / len(sequence)) * 100
            
            seq_stats.append({
                'id': record.id,
                'length': len(sequence),
                'gaps': gaps,
                'gap_percent': gap_percent,
                'effective_length': len(sequence) - gaps
            })
        
        return seq_stats
    
    def _evaluate_overall_quality(self, gap_stats, conservation_stats):
        """‡∏õ‡∏£‡∏∞‡πÄ‡∏°‡∏¥‡∏ô‡∏Ñ‡∏∏‡∏ì‡∏†‡∏≤‡∏û‡πÇ‡∏î‡∏¢‡∏£‡∏ß‡∏°"""
        score = 0
        
        # ‡∏Ñ‡∏∞‡πÅ‡∏ô‡∏ô‡∏à‡∏≤‡∏Å gap percentage (‡∏ô‡πâ‡∏≠‡∏¢‡∏Å‡∏ß‡πà‡∏≤ = ‡∏î‡∏µ‡∏Å‡∏ß‡πà‡∏≤)
        if gap_stats['gap_percentage'] < 10:
            score += 3
        elif gap_stats['gap_percentage'] < 20:
            score += 2
        elif gap_stats['gap_percentage'] < 40:
            score += 1
        
        # ‡∏Ñ‡∏∞‡πÅ‡∏ô‡∏ô‡∏à‡∏≤‡∏Å conservation
        if conservation_stats['conservation_score'] > 80:
            score += 3
        elif conservation_stats['conservation_score'] > 60:
            score += 2
        elif conservation_stats['conservation_score'] > 40:
            score += 1
        
        # ‡∏Ñ‡∏∞‡πÅ‡∏ô‡∏ô‡∏à‡∏≤‡∏Å fully conserved positions
        if conservation_stats['fully_conserved_percent'] > 30:
            score += 2
        elif conservation_stats['fully_conserved_percent'] > 15:
            score += 1
        
        # ‡∏Ñ‡∏∞‡πÅ‡∏ô‡∏ô‡∏à‡∏≤‡∏Å‡∏Ñ‡∏ß‡∏≤‡∏°‡∏™‡∏°‡πà‡∏≥‡πÄ‡∏™‡∏°‡∏≠‡∏Ç‡∏≠‡∏á gaps
        gap_range = gap_stats['max_gaps_seq'] - gap_stats['min_gaps_seq']
        if gap_range < 10:
            score += 2
        elif gap_range < 25:
            score += 1
        
        # ‡∏Å‡∏≥‡∏´‡∏ô‡∏î‡πÄ‡∏Å‡∏£‡∏î‡πÅ‡∏•‡∏∞‡∏Ñ‡∏≥‡πÅ‡∏ô‡∏∞‡∏ô‡∏≥
        if score >= 8:
            grade = "‡πÄ‡∏¢‡∏µ‡πà‡∏¢‡∏° (A)"
            recommendation = "alignment ‡∏°‡∏µ‡∏Ñ‡∏∏‡∏ì‡∏†‡∏≤‡∏û‡∏™‡∏π‡∏á‡∏°‡∏≤‡∏Å ‡πÄ‡∏´‡∏°‡∏≤‡∏∞‡∏™‡∏≥‡∏´‡∏£‡∏±‡∏ö‡∏Å‡∏≤‡∏£‡∏≠‡∏≠‡∏Å‡πÅ‡∏ö‡∏ö‡πÑ‡∏û‡∏£‡πÄ‡∏°‡∏≠‡∏£‡πå"
        elif score >= 6:
            grade = "‡∏î‡∏µ (B)"
            recommendation = "alignment ‡∏°‡∏µ‡∏Ñ‡∏∏‡∏ì‡∏†‡∏≤‡∏û‡∏î‡∏µ ‡∏™‡∏≤‡∏°‡∏≤‡∏£‡∏ñ‡πÉ‡∏ä‡πâ‡∏≠‡∏≠‡∏Å‡πÅ‡∏ö‡∏ö‡πÑ‡∏û‡∏£‡πÄ‡∏°‡∏≠‡∏£‡πå‡πÑ‡∏î‡πâ"
        elif score >= 4:
            grade = "‡∏û‡∏≠‡πÉ‡∏ä‡πâ (C)"
            recommendation = "alignment ‡∏°‡∏µ‡∏Ñ‡∏∏‡∏ì‡∏†‡∏≤‡∏û‡∏õ‡∏≤‡∏ô‡∏Å‡∏•‡∏≤‡∏á ‡∏Ñ‡∏ß‡∏£‡∏û‡∏¥‡∏à‡∏≤‡∏£‡∏ì‡∏≤‡∏Å‡∏£‡∏≠‡∏á‡∏ã‡∏µ‡πÄ‡∏Ñ‡∏ß‡∏ô‡∏ã‡πå‡πÄ‡∏û‡∏¥‡πà‡∏°‡πÄ‡∏ï‡∏¥‡∏°"
        else:
            grade = "‡∏ï‡πâ‡∏≠‡∏á‡∏õ‡∏£‡∏±‡∏ö‡∏õ‡∏£‡∏∏‡∏á (D)"
            recommendation = "alignment ‡∏°‡∏µ‡∏Ñ‡∏∏‡∏ì‡∏†‡∏≤‡∏û‡∏ï‡πà‡∏≥ ‡πÅ‡∏ô‡∏∞‡∏ô‡∏≥‡πÉ‡∏´‡πâ‡∏Å‡∏£‡∏≠‡∏á‡∏ã‡∏µ‡πÄ‡∏Ñ‡∏ß‡∏ô‡∏ã‡πå‡πÉ‡∏´‡∏°‡πà‡∏´‡∏£‡∏∑‡∏≠‡∏õ‡∏£‡∏±‡∏ö‡∏û‡∏≤‡∏£‡∏≤‡∏°‡∏¥‡πÄ‡∏ï‡∏≠‡∏£‡πå"
        
        return {
            'score': score,
            'grade': grade,
            'recommendation': recommendation
        }
    
    def run_complete_pipeline(self, target_species=None):
        """‡∏£‡∏±‡∏ô‡∏Å‡∏£‡∏∞‡∏ö‡∏ß‡∏ô‡∏Å‡∏≤‡∏£‡∏ó‡∏±‡πâ‡∏á‡∏´‡∏°‡∏î‡πÅ‡∏ö‡∏ö‡∏Ñ‡∏£‡∏ö‡∏ß‡∏á‡∏à‡∏£"""
        print("üöÄ ‡πÄ‡∏£‡∏¥‡πà‡∏°‡∏ï‡πâ‡∏ô‡∏Å‡∏£‡∏∞‡∏ö‡∏ß‡∏ô‡∏Å‡∏≤‡∏£ High-Quality Alignment Pipeline")
        print("="*60)
        print(f"üìÖ ‡πÄ‡∏ß‡∏•‡∏≤‡πÄ‡∏£‡∏¥‡πà‡∏°‡∏ï‡πâ‡∏ô: {time.strftime('%Y-%m-%d %H:%M:%S')}")
        
        start_time = time.time()
        
        # ‡∏ï‡∏±‡πâ‡∏á‡∏Ñ‡πà‡∏≤ target species
        if target_species:
            self.set_target_species(target_species)
        
        # ‡∏Ç‡∏±‡πâ‡∏ô‡∏ó‡∏µ‡πà 1: ‡∏Å‡∏£‡∏≠‡∏á‡∏ã‡∏µ‡πÄ‡∏Ñ‡∏ß‡∏ô‡∏ã‡πå‡πÅ‡∏ö‡∏ö‡∏Ñ‡∏£‡∏≠‡∏ö‡∏Ñ‡∏•‡∏∏‡∏°
        print(f"\nüî• ‡∏Ç‡∏±‡πâ‡∏ô‡∏ó‡∏µ‡πà 1: ‡∏Å‡∏≤‡∏£‡∏Å‡∏£‡∏≠‡∏á‡∏ã‡∏µ‡πÄ‡∏Ñ‡∏ß‡∏ô‡∏ã‡πå‡πÅ‡∏ö‡∏ö‡∏Ñ‡∏£‡∏≠‡∏ö‡∏Ñ‡∏•‡∏∏‡∏°")
        self.filter_by_length()
        
        if len(self.filtered_sequences) < 2:
            print("‚ùå ‡∏ã‡∏µ‡πÄ‡∏Ñ‡∏ß‡∏ô‡∏ã‡πå‡πÑ‡∏°‡πà‡πÄ‡∏û‡∏µ‡∏¢‡∏á‡∏û‡∏≠‡∏™‡∏≥‡∏´‡∏£‡∏±‡∏ö alignment ‡∏´‡∏•‡∏±‡∏á‡∏Å‡∏≤‡∏£‡∏Å‡∏£‡∏≠‡∏á")
            return None
        
        # ‡∏Ç‡∏±‡πâ‡∏ô‡∏ó‡∏µ‡πà 2: Progressive alignment ‡∏û‡∏£‡πâ‡∏≠‡∏°‡∏Ñ‡∏ß‡∏ö‡∏Ñ‡∏∏‡∏°‡∏Ñ‡∏∏‡∏ì‡∏†‡∏≤‡∏û
        print(f"\nüî• ‡∏Ç‡∏±‡πâ‡∏ô‡∏ó‡∏µ‡πà 2: Progressive Alignment")
        result = self.progressive_alignment()
        if not result:
            print("‚ùå Progressive alignment ‡∏•‡πâ‡∏°‡πÄ‡∏´‡∏•‡∏ß")
            return None
        
        accepted_sequences, filtered_file = result
        
        if len(accepted_sequences) < 2:
            print("‚ùå ‡∏ã‡∏µ‡πÄ‡∏Ñ‡∏ß‡∏ô‡∏ã‡πå‡πÑ‡∏°‡πà‡πÄ‡∏û‡∏µ‡∏¢‡∏á‡∏û‡∏≠‡∏™‡∏≥‡∏´‡∏£‡∏±‡∏ö MAFFT alignment")
            return None
        
        # ‡∏Ç‡∏±‡πâ‡∏ô‡∏ó‡∏µ‡πà 3: ‡∏£‡∏±‡∏ô MAFFT alignment
        print(f"\nüî• ‡∏Ç‡∏±‡πâ‡∏ô‡∏ó‡∏µ‡πà 3: MAFFT Alignment")
        final_alignment = os.path.join(self.output_dir, "final_alignment_ON_COI.fasta")
        alignment_file = self.run_mafft_alignment(filtered_file, final_alignment)
        
        if not alignment_file:
            print("‚ùå MAFFT alignment ‡∏•‡πâ‡∏°‡πÄ‡∏´‡∏•‡∏ß")
            return None
        
        # ‡∏Ç‡∏±‡πâ‡∏ô‡∏ó‡∏µ‡πà 4: ‡∏ß‡∏¥‡πÄ‡∏Ñ‡∏£‡∏≤‡∏∞‡∏´‡πå‡∏Ñ‡∏∏‡∏ì‡∏†‡∏≤‡∏û‡πÅ‡∏ö‡∏ö‡∏•‡∏∞‡πÄ‡∏≠‡∏µ‡∏¢‡∏î
        print(f"\nüî• ‡∏Ç‡∏±‡πâ‡∏ô‡∏ó‡∏µ‡πà 4: ‡∏Å‡∏≤‡∏£‡∏ß‡∏¥‡πÄ‡∏Ñ‡∏£‡∏≤‡∏∞‡∏´‡πå‡∏Ñ‡∏∏‡∏ì‡∏†‡∏≤‡∏û")
        quality_stats = self.analyze_alignment_quality(alignment_file)
        
        if not quality_stats:
            print("‚ö†Ô∏è ‡πÑ‡∏°‡πà‡∏™‡∏≤‡∏°‡∏≤‡∏£‡∏ñ‡∏ß‡∏¥‡πÄ‡∏Ñ‡∏£‡∏≤‡∏∞‡∏´‡πå‡∏Ñ‡∏∏‡∏ì‡∏†‡∏≤‡∏û‡πÑ‡∏î‡πâ ‡πÅ‡∏ï‡πà alignment ‡∏™‡∏≥‡πÄ‡∏£‡πá‡∏à‡πÅ‡∏•‡πâ‡∏ß")
        
        # ‡∏Ñ‡∏≥‡∏ô‡∏ß‡∏ì‡πÄ‡∏ß‡∏•‡∏≤‡∏ó‡∏µ‡πà‡πÉ‡∏ä‡πâ
        end_time = time.time()
        elapsed_time = end_time - start_time
        
        # ‡∏™‡∏£‡∏∏‡∏õ‡∏ú‡∏•‡∏•‡∏±‡∏û‡∏ò‡πå‡∏™‡∏∏‡∏î‡∏ó‡πâ‡∏≤‡∏¢
        print("\n" + "="*60)
        print("üéâ ‡∏Å‡∏£‡∏∞‡∏ö‡∏ß‡∏ô‡∏Å‡∏≤‡∏£‡πÄ‡∏™‡∏£‡πá‡∏à‡∏™‡∏¥‡πâ‡∏ô‡∏™‡∏°‡∏ö‡∏π‡∏£‡∏ì‡πå!")
        print("="*60)
        
        print(f"üìÅ ‡πÑ‡∏ü‡∏•‡πå‡∏ú‡∏•‡∏•‡∏±‡∏û‡∏ò‡πå:")
        print(f"  üß¨ Alignment ‡∏™‡∏∏‡∏î‡∏ó‡πâ‡∏≤‡∏¢: {alignment_file}")
        print(f"  üìä ‡∏ã‡∏µ‡πÄ‡∏Ñ‡∏ß‡∏ô‡∏ã‡πå‡∏ó‡∏µ‡πà‡∏Ñ‡∏±‡∏î‡πÄ‡∏•‡∏∑‡∏≠‡∏Å: {filtered_file}")
        
        print(f"\nüìä ‡∏™‡∏ñ‡∏¥‡∏ï‡∏¥‡∏™‡∏£‡∏∏‡∏õ:")
        print(f"  ‚è±Ô∏è  ‡πÄ‡∏ß‡∏•‡∏≤‡∏ó‡∏µ‡πà‡πÉ‡∏ä‡πâ: {elapsed_time:.1f} ‡∏ß‡∏¥‡∏ô‡∏≤‡∏ó‡∏µ")
        print(f"  üìù ‡∏ã‡∏µ‡πÄ‡∏Ñ‡∏ß‡∏ô‡∏ã‡πå‡πÄ‡∏£‡∏¥‡πà‡∏°‡∏ï‡πâ‡∏ô: {len(self.sequences)}")
        print(f"  ‚úÖ ‡∏ã‡∏µ‡πÄ‡∏Ñ‡∏ß‡∏ô‡∏ã‡πå‡∏™‡∏∏‡∏î‡∏ó‡πâ‡∏≤‡∏¢: {len(accepted_sequences)}")
        print(f"  üéØ ‡∏≠‡∏±‡∏ï‡∏£‡∏≤‡∏Å‡∏≤‡∏£‡∏Ñ‡∏±‡∏î‡πÄ‡∏•‡∏∑‡∏≠‡∏Å: {(len(accepted_sequences)/len(self.sequences)*100):.1f}%")
        
        if quality_stats:
            print(f"  üìè ‡∏Ñ‡∏ß‡∏≤‡∏°‡∏¢‡∏≤‡∏ß alignment: {quality_stats['length']:,} bp")
            print(f"  üï≥Ô∏è  Gaps: {quality_stats['gap_percentage']:.1f}%")
            print(f"  üèÜ ‡∏Ñ‡∏∞‡πÅ‡∏ô‡∏ô‡∏Ñ‡∏∏‡∏ì‡∏†‡∏≤‡∏û: {quality_stats['quality_score']:.1f}/10 ({quality_stats['quality_grade']})")
        
        print(f"\nüí° ‡∏™‡∏£‡∏∏‡∏õ‡∏Ñ‡∏ß‡∏≤‡∏°‡∏û‡∏£‡πâ‡∏≠‡∏°‡∏™‡∏≥‡∏´‡∏£‡∏±‡∏ö‡∏Å‡∏≤‡∏£‡∏≠‡∏≠‡∏Å‡πÅ‡∏ö‡∏ö‡πÑ‡∏û‡∏£‡πÄ‡∏°‡∏≠‡∏£‡πå:")
        
        if quality_stats and quality_stats['quality_score'] >= 6:
            print("  ‚úÖ Alignment ‡∏°‡∏µ‡∏Ñ‡∏∏‡∏ì‡∏†‡∏≤‡∏û‡∏î‡∏µ ‡∏û‡∏£‡πâ‡∏≠‡∏°‡∏™‡∏≥‡∏´‡∏£‡∏±‡∏ö‡∏Å‡∏≤‡∏£‡∏≠‡∏≠‡∏Å‡πÅ‡∏ö‡∏ö‡πÑ‡∏û‡∏£‡πÄ‡∏°‡∏≠‡∏£‡πå")
            print("  üéØ ‡πÅ‡∏ô‡∏∞‡∏ô‡∏≥‡πÉ‡∏´‡πâ‡∏°‡∏≠‡∏á‡∏´‡∏≤‡∏ö‡∏£‡∏¥‡πÄ‡∏ß‡∏ì‡∏ó‡∏µ‡πà conserved ‡∏™‡∏π‡∏á‡∏™‡∏≥‡∏´‡∏£‡∏±‡∏ö‡πÑ‡∏û‡∏£‡πÄ‡∏°‡∏≠‡∏£‡πå")
            print("  üî¨ ‡πÉ‡∏ä‡πâ‡∏ö‡∏£‡∏¥‡πÄ‡∏ß‡∏ì variable ‡∏™‡∏≥‡∏´‡∏£‡∏±‡∏ö‡∏Å‡∏≤‡∏£‡πÅ‡∏¢‡∏Å‡πÅ‡∏¢‡∏∞‡∏™‡∏õ‡∏µ‡∏ä‡∏µ‡∏™‡πå")
        elif quality_stats and quality_stats['quality_score'] >= 4:
            print("  ‚ö†Ô∏è Alignment ‡∏°‡∏µ‡∏Ñ‡∏∏‡∏ì‡∏†‡∏≤‡∏û‡∏õ‡∏≤‡∏ô‡∏Å‡∏•‡∏≤‡∏á")
            print("  üí° ‡πÅ‡∏ô‡∏∞‡∏ô‡∏≥‡πÉ‡∏´‡πâ‡∏ï‡∏£‡∏ß‡∏à‡∏™‡∏≠‡∏ö‡∏ö‡∏£‡∏¥‡πÄ‡∏ß‡∏ì‡∏ó‡∏µ‡πà‡∏°‡∏µ gaps ‡∏°‡∏≤‡∏Å‡∏Å‡πà‡∏≠‡∏ô‡∏≠‡∏≠‡∏Å‡πÅ‡∏ö‡∏ö‡πÑ‡∏û‡∏£‡πÄ‡∏°‡∏≠‡∏£‡πå")
        else:
            print("  ‚ùå Alignment ‡∏≠‡∏≤‡∏à‡∏ï‡πâ‡∏≠‡∏á‡∏Å‡∏≤‡∏£‡∏Å‡∏≤‡∏£‡∏õ‡∏£‡∏±‡∏ö‡∏õ‡∏£‡∏∏‡∏á")
            print("  üîß ‡∏û‡∏¥‡∏à‡∏≤‡∏£‡∏ì‡∏≤‡∏õ‡∏£‡∏±‡∏ö‡πÄ‡∏Å‡∏ì‡∏ë‡πå‡∏Å‡∏≤‡∏£‡∏Å‡∏£‡∏≠‡∏á‡∏´‡∏£‡∏∑‡∏≠‡πÉ‡∏ä‡πâ‡∏û‡∏≤‡∏£‡∏≤‡∏°‡∏¥‡πÄ‡∏ï‡∏≠‡∏£‡πå MAFFT ‡∏≠‡∏∑‡πà‡∏ô")
        
        return alignment_file, quality_stats

# ===== ‡∏ü‡∏±‡∏á‡∏Å‡πå‡∏ä‡∏±‡∏ô‡πÄ‡∏™‡∏£‡∏¥‡∏° =====
import time

def print_welcome_banner():
    """‡πÅ‡∏™‡∏î‡∏á‡πÅ‡∏ö‡∏ô‡πÄ‡∏ô‡∏≠‡∏£‡πå‡∏ï‡πâ‡∏≠‡∏ô‡∏£‡∏±‡∏ö"""
    print("="*70)
    print("üß¨ HIGH-QUALITY SEQUENCE ALIGNMENT PIPELINE üß¨")
    print("="*70)
    print("üéØ ‡∏à‡∏∏‡∏î‡∏õ‡∏£‡∏∞‡∏™‡∏á‡∏Ñ‡πå: ‡πÄ‡∏ï‡∏£‡∏µ‡∏¢‡∏°‡∏ã‡∏µ‡πÄ‡∏Ñ‡∏ß‡∏ô‡∏ã‡πå‡∏™‡∏≥‡∏´‡∏£‡∏±‡∏ö‡∏Å‡∏≤‡∏£‡∏≠‡∏≠‡∏Å‡πÅ‡∏ö‡∏ö‡πÑ‡∏û‡∏£‡πÄ‡∏°‡∏≠‡∏£‡πå‡πÅ‡∏•‡∏∞‡πÇ‡∏û‡∏£‡∏ö")
    print("‚ö° ‡∏Ñ‡∏∏‡∏ì‡∏™‡∏°‡∏ö‡∏±‡∏ï‡∏¥: ‡∏Å‡∏£‡∏≠‡∏á‡∏Ñ‡∏∏‡∏ì‡∏†‡∏≤‡∏û, ‡∏ï‡∏±‡∏î duplicates, progressive alignment")
    print("üî¨ ‡∏ú‡∏•‡∏•‡∏±‡∏û‡∏ò‡πå: Alignment ‡∏Ñ‡∏∏‡∏ì‡∏†‡∏≤‡∏û‡∏™‡∏π‡∏á‡∏û‡∏£‡πâ‡∏≠‡∏°‡∏Å‡∏≤‡∏£‡∏ß‡∏¥‡πÄ‡∏Ñ‡∏£‡∏≤‡∏∞‡∏´‡πå‡∏•‡∏∞‡πÄ‡∏≠‡∏µ‡∏¢‡∏î")
    print("="*70)

# ===== ‡∏Å‡∏≤‡∏£‡πÉ‡∏ä‡πâ‡∏á‡∏≤‡∏ô =====
if __name__ == "__main__":
    # ‡πÅ‡∏™‡∏î‡∏á‡πÅ‡∏ö‡∏ô‡πÄ‡∏ô‡∏≠‡∏£‡πå‡∏ï‡πâ‡∏≠‡∏ô‡∏£‡∏±‡∏ö
    print_welcome_banner()
    
    # ‡∏ï‡∏±‡πâ‡∏á‡∏Ñ‡πà‡∏≤‡πÑ‡∏ü‡∏•‡πå‡∏≠‡∏¥‡∏ô‡∏û‡∏∏‡∏ï
    input_fasta = "/Users/sarawut/Desktop/oneclick/thai_fish_sequences_ON_COI.fasta"
    
    # ‡∏ï‡∏£‡∏ß‡∏à‡∏™‡∏≠‡∏ö‡πÑ‡∏ü‡∏•‡πå‡∏≠‡∏¥‡∏ô‡∏û‡∏∏‡∏ï
    if not os.path.exists(input_fasta):
        print(f"‚ùå ‡πÑ‡∏°‡πà‡∏û‡∏ö‡πÑ‡∏ü‡∏•‡πå‡∏≠‡∏¥‡∏ô‡∏û‡∏∏‡∏ï: {input_fasta}")
        print("üí° ‡∏Å‡∏£‡∏∏‡∏ì‡∏≤‡∏ï‡∏£‡∏ß‡∏à‡∏™‡∏≠‡∏ö‡πÄ‡∏™‡πâ‡∏ô‡∏ó‡∏≤‡∏á‡πÑ‡∏ü‡∏•‡πå‡πÅ‡∏•‡∏∞‡∏•‡∏≠‡∏á‡πÉ‡∏´‡∏°‡πà")
        exit(1)
    
    # ‡∏™‡∏£‡πâ‡∏≤‡∏á aligner object
    try:
        aligner = SequenceAligner(input_fasta)
    except Exception as e:
        print(f"‚ùå ‡πÑ‡∏°‡πà‡∏™‡∏≤‡∏°‡∏≤‡∏£‡∏ñ‡∏™‡∏£‡πâ‡∏≤‡∏á aligner ‡πÑ‡∏î‡πâ: {e}")
        exit(1)
    
    # ‡∏ï‡∏±‡πâ‡∏á‡∏Ñ‡πà‡∏≤‡∏™‡∏õ‡∏µ‡∏ä‡∏µ‡∏™‡πå‡∏´‡∏•‡∏±‡∏Å (‡∏õ‡∏£‡∏±‡∏ö‡∏ï‡∏≤‡∏°‡∏Ñ‡∏ß‡∏≤‡∏°‡∏ï‡πâ‡∏≠‡∏á‡∏Å‡∏≤‡∏£)
    target_species = [
        "oreochromis niloticus",  # ‡∏™‡∏õ‡∏µ‡∏ä‡∏µ‡∏™‡πå‡∏´‡∏•‡∏±‡∏Å‡∏ó‡∏µ‡πà‡∏ï‡πâ‡∏≠‡∏á‡∏Å‡∏≤‡∏£
        # ‡∏´‡∏£‡∏∑‡∏≠‡πÉ‡∏ä‡πâ‡∏Ñ‡∏≥‡∏™‡∏≥‡∏Ñ‡∏±‡∏ç‡πÉ‡∏ô‡∏ä‡∏∑‡πà‡∏≠‡∏™‡∏õ‡∏µ‡∏ä‡∏µ‡∏™‡πå
        # ‡πÄ‡∏û‡∏¥‡πà‡∏°‡∏™‡∏õ‡∏µ‡∏ä‡∏µ‡∏™‡πå‡∏´‡∏•‡∏±‡∏Å‡∏≠‡∏∑‡πà‡∏ô‡πÜ ‡πÑ‡∏î‡πâ‡∏ó‡∏µ‡πà‡∏ô‡∏µ‡πà
    ]
    
    print(f"\nüéØ ‡∏Å‡∏≤‡∏£‡∏ï‡∏±‡πâ‡∏á‡∏Ñ‡πà‡∏≤‡∏™‡∏≥‡∏´‡∏£‡∏±‡∏ö‡∏Å‡∏≤‡∏£‡∏£‡∏±‡∏ô:")
    print(f"  üìÅ ‡πÑ‡∏ü‡∏•‡πå‡∏≠‡∏¥‡∏ô‡∏û‡∏∏‡∏ï: {input_fasta}")
    print(f"  üéØ ‡∏™‡∏õ‡∏µ‡∏ä‡∏µ‡∏™‡πå‡∏´‡∏•‡∏±‡∏Å: {target_species}")
    print(f"  üìä ‡πÄ‡∏Å‡∏ì‡∏ë‡πå‡∏Ñ‡∏ß‡∏≤‡∏°‡∏¢‡∏≤‡∏ß: ¬±{aligner.length_tolerance*100:.0f}% ‡∏à‡∏≤‡∏Å‡∏Ñ‡πà‡∏≤‡∏Å‡∏•‡∏≤‡∏á")
    print(f"  üî¨ ‡∏Ñ‡∏∞‡πÅ‡∏ô‡∏ô alignment ‡∏Ç‡∏±‡πâ‡∏ô‡∏ï‡πà‡∏≥: {aligner.min_alignment_score}")
    print(f"  üï≥Ô∏è  % gaps ‡∏™‡∏π‡∏á‡∏™‡∏∏‡∏î‡∏ó‡∏µ‡πà‡∏¢‡∏≠‡∏°‡∏£‡∏±‡∏ö: {aligner.max_gap_percentage*100:.0f}%")
    
    # ‡∏ñ‡∏≤‡∏°‡∏ú‡∏π‡πâ‡πÉ‡∏ä‡πâ‡∏ß‡πà‡∏≤‡∏à‡∏∞‡∏î‡∏≥‡πÄ‡∏ô‡∏¥‡∏ô‡∏Å‡∏≤‡∏£‡∏ï‡πà‡∏≠‡∏´‡∏£‡∏∑‡∏≠‡πÑ‡∏°‡πà
    print(f"\n‚ùì ‡∏ï‡πâ‡∏≠‡∏á‡∏Å‡∏≤‡∏£‡∏î‡∏≥‡πÄ‡∏ô‡∏¥‡∏ô‡∏Å‡∏≤‡∏£‡∏ï‡πà‡∏≠‡∏´‡∏£‡∏∑‡∏≠‡πÑ‡∏°‡πà? (y/n): ", end="")
    
    # ‡∏™‡∏≥‡∏´‡∏£‡∏±‡∏ö‡∏Å‡∏≤‡∏£‡∏£‡∏±‡∏ô‡πÅ‡∏ö‡∏ö‡∏≠‡∏±‡∏ï‡πÇ‡∏ô‡∏°‡∏±‡∏ï‡∏¥ ‡∏™‡∏≤‡∏°‡∏≤‡∏£‡∏ñ‡∏Ç‡πâ‡∏≤‡∏°‡∏™‡πà‡∏ß‡∏ô‡∏ô‡∏µ‡πâ‡πÑ‡∏î‡πâ
    # user_confirm = input().lower().strip()
    # if user_confirm not in ['y', 'yes', '‡πÉ‡∏ä‡πà']:
    #     print("üö´ ‡∏¢‡∏Å‡πÄ‡∏•‡∏¥‡∏Å‡∏Å‡∏≤‡∏£‡∏î‡∏≥‡πÄ‡∏ô‡∏¥‡∏ô‡∏Å‡∏≤‡∏£")
    #     exit(0)
    
    print("y")  # ‡∏™‡∏≥‡∏´‡∏£‡∏±‡∏ö‡∏Å‡∏≤‡∏£‡∏£‡∏±‡∏ô‡∏≠‡∏±‡∏ï‡πÇ‡∏ô‡∏°‡∏±‡∏ï‡∏¥
    
    # ‡∏£‡∏±‡∏ô‡∏Å‡∏£‡∏∞‡∏ö‡∏ß‡∏ô‡∏Å‡∏≤‡∏£‡∏ó‡∏±‡πâ‡∏á‡∏´‡∏°‡∏î
    try:
        result = aligner.run_complete_pipeline(target_species)
        
        if result:
            alignment_file, stats = result
            print(f"\n‚ú® ‡∏™‡∏≥‡πÄ‡∏£‡πá‡∏à!")
            print(f"üìÅ ‡πÑ‡∏ü‡∏•‡πå alignment: {alignment_file}")
            
            if stats:
                print(f"üèÜ ‡∏Ñ‡∏∞‡πÅ‡∏ô‡∏ô‡∏Ñ‡∏∏‡∏ì‡∏†‡∏≤‡∏û: {stats['quality_score']:.1f}/10")
                
                # ‡πÅ‡∏ô‡∏∞‡∏ô‡∏≥‡∏Ç‡∏±‡πâ‡∏ô‡∏ï‡∏≠‡∏ô‡∏ï‡πà‡∏≠‡πÑ‡∏õ
                print(f"\nüöÄ ‡∏Ç‡∏±‡πâ‡∏ô‡∏ï‡∏≠‡∏ô‡∏ï‡πà‡∏≠‡πÑ‡∏õ:")
                print(f"  1. ‡∏ï‡∏£‡∏ß‡∏à‡∏™‡∏≠‡∏ö‡πÑ‡∏ü‡∏•‡πå alignment ‡∏î‡πâ‡∏ß‡∏¢ sequence viewer")
                print(f"  2. ‡∏£‡∏∞‡∏ö‡∏∏‡∏ö‡∏£‡∏¥‡πÄ‡∏ß‡∏ì conserved ‡∏™‡∏≥‡∏´‡∏£‡∏±‡∏ö‡∏Å‡∏≤‡∏£‡∏≠‡∏≠‡∏Å‡πÅ‡∏ö‡∏ö‡πÑ‡∏û‡∏£‡πÄ‡∏°‡∏≠‡∏£‡πå")
                print(f"  3. ‡∏´‡∏≤‡∏ö‡∏£‡∏¥‡πÄ‡∏ß‡∏ì variable ‡∏™‡∏≥‡∏´‡∏£‡∏±‡∏ö‡∏Å‡∏≤‡∏£‡πÅ‡∏¢‡∏Å‡πÅ‡∏¢‡∏∞‡∏™‡∏õ‡∏µ‡∏ä‡∏µ‡∏™‡πå")
                print(f"  4. ‡πÉ‡∏ä‡πâ‡πÄ‡∏Ñ‡∏£‡∏∑‡πà‡∏≠‡∏á‡∏°‡∏∑‡∏≠‡∏≠‡∏≠‡∏Å‡πÅ‡∏ö‡∏ö‡πÑ‡∏û‡∏£‡πÄ‡∏°‡∏≠‡∏£‡πå‡πÄ‡∏ä‡πà‡∏ô Primer3 ‡∏´‡∏£‡∏∑‡∏≠ NCBI Primer-BLAST")
            
        else:
            print("\n‚ùå ‡∏Å‡∏£‡∏∞‡∏ö‡∏ß‡∏ô‡∏Å‡∏≤‡∏£‡∏•‡πâ‡∏°‡πÄ‡∏´‡∏•‡∏ß")
            print("üí° ‡πÅ‡∏ô‡∏∞‡∏ô‡∏≥:")
            print("  - ‡∏ï‡∏£‡∏ß‡∏à‡∏™‡∏≠‡∏ö‡∏Ñ‡∏∏‡∏ì‡∏†‡∏≤‡∏û‡∏Ç‡∏≠‡∏á‡∏ã‡∏µ‡πÄ‡∏Ñ‡∏ß‡∏ô‡∏ã‡πå‡∏≠‡∏¥‡∏ô‡∏û‡∏∏‡∏ï")
            print("  - ‡∏•‡∏≠‡∏á‡∏õ‡∏£‡∏±‡∏ö‡πÄ‡∏Å‡∏ì‡∏ë‡πå‡∏Å‡∏≤‡∏£‡∏Å‡∏£‡∏≠‡∏á‡πÉ‡∏´‡πâ‡∏´‡∏•‡∏ß‡∏°‡∏Ç‡∏∂‡πâ‡∏ô")
            print("  - ‡∏ï‡∏£‡∏ß‡∏à‡∏™‡∏≠‡∏ö‡∏ß‡πà‡∏≤‡∏ã‡∏µ‡πÄ‡∏Ñ‡∏ß‡∏ô‡∏ã‡πå‡∏°‡∏≤‡∏à‡∏≤‡∏Å‡∏™‡∏õ‡∏µ‡∏ä‡∏µ‡∏™‡πå‡∏ó‡∏µ‡πà‡πÄ‡∏Å‡∏µ‡πà‡∏¢‡∏ß‡∏Ç‡πâ‡∏≠‡∏á‡∏Å‡∏±‡∏ô")
            
    except KeyboardInterrupt:
        print(f"\n‚ö†Ô∏è ‡∏ú‡∏π‡πâ‡πÉ‡∏ä‡πâ‡∏´‡∏¢‡∏∏‡∏î‡∏Å‡∏≤‡∏£‡∏ó‡∏≥‡∏á‡∏≤‡∏ô")
    except Exception as e:
        print(f"\n‚ùå ‡πÄ‡∏Å‡∏¥‡∏î‡∏Ç‡πâ‡∏≠‡∏ú‡∏¥‡∏î‡∏û‡∏•‡∏≤‡∏î‡πÑ‡∏°‡πà‡∏Ñ‡∏≤‡∏î‡∏Ñ‡∏¥‡∏î: {e}")
        print("üí° ‡∏Å‡∏£‡∏∏‡∏ì‡∏≤‡∏ï‡∏£‡∏ß‡∏à‡∏™‡∏≠‡∏ö‡πÑ‡∏ü‡∏•‡πå‡∏≠‡∏¥‡∏ô‡∏û‡∏∏‡∏ï‡πÅ‡∏•‡∏∞‡∏•‡∏≠‡∏á‡πÉ‡∏´‡∏°‡πà")
    
    print(f"\nüìÖ ‡πÄ‡∏ß‡∏•‡∏≤‡∏™‡∏¥‡πâ‡∏ô‡∏™‡∏∏‡∏î: {time.strftime('%Y-%m-%d %H:%M:%S')}")
    print("üôè ‡∏Ç‡∏≠‡∏ö‡∏Ñ‡∏∏‡∏ì‡∏ó‡∏µ‡πà‡πÉ‡∏ä‡πâ High-Quality Sequence Alignment Pipeline!")

üß¨ HIGH-QUALITY SEQUENCE ALIGNMENT PIPELINE üß¨
üéØ ‡∏à‡∏∏‡∏î‡∏õ‡∏£‡∏∞‡∏™‡∏á‡∏Ñ‡πå: ‡πÄ‡∏ï‡∏£‡∏µ‡∏¢‡∏°‡∏ã‡∏µ‡πÄ‡∏Ñ‡∏ß‡∏ô‡∏ã‡πå‡∏™‡∏≥‡∏´‡∏£‡∏±‡∏ö‡∏Å‡∏≤‡∏£‡∏≠‡∏≠‡∏Å‡πÅ‡∏ö‡∏ö‡πÑ‡∏û‡∏£‡πÄ‡∏°‡∏≠‡∏£‡πå‡πÅ‡∏•‡∏∞‡πÇ‡∏û‡∏£‡∏ö
‚ö° ‡∏Ñ‡∏∏‡∏ì‡∏™‡∏°‡∏ö‡∏±‡∏ï‡∏¥: ‡∏Å‡∏£‡∏≠‡∏á‡∏Ñ‡∏∏‡∏ì‡∏†‡∏≤‡∏û, ‡∏ï‡∏±‡∏î duplicates, progressive alignment
üî¨ ‡∏ú‡∏•‡∏•‡∏±‡∏û‡∏ò‡πå: Alignment ‡∏Ñ‡∏∏‡∏ì‡∏†‡∏≤‡∏û‡∏™‡∏π‡∏á‡∏û‡∏£‡πâ‡∏≠‡∏°‡∏Å‡∏≤‡∏£‡∏ß‡∏¥‡πÄ‡∏Ñ‡∏£‡∏≤‡∏∞‡∏´‡πå‡∏•‡∏∞‡πÄ‡∏≠‡∏µ‡∏¢‡∏î
üîç ‡∏Å‡∏≥‡∏•‡∏±‡∏á‡∏≠‡πà‡∏≤‡∏ô‡∏ã‡∏µ‡πÄ‡∏Ñ‡∏ß‡∏ô‡∏ã‡πå...
  ‡∏û‡∏ö‡∏ã‡∏µ‡πÄ‡∏Ñ‡∏ß‡∏ô‡∏ã‡πå‡∏ó‡∏±‡πâ‡∏á‡∏´‡∏°‡∏î: 1642
  ‡∏Ñ‡∏ß‡∏≤‡∏°‡∏¢‡∏≤‡∏ß‡πÄ‡∏â‡∏•‡∏µ‡πà‡∏¢: 2,693 bp
  ‡∏Ñ‡∏ß‡∏≤‡∏°‡∏¢‡∏≤‡∏ß‡∏Å‡∏•‡∏≤‡∏á: 651 bp
  ‡∏™‡∏±‡πâ‡∏ô‡∏™‡∏∏‡∏î: 140 bp
  ‡∏¢‡∏≤‡∏ß‡∏™‡∏∏‡∏î: 17,388 bp

üéØ ‡∏Å‡∏≤‡∏£‡∏ï‡∏±‡πâ‡∏á‡∏Ñ‡πà‡∏≤‡∏™‡∏≥‡∏´‡∏£‡∏±‡∏ö‡∏Å‡∏≤‡∏£‡∏£‡∏±‡∏ô:
  üìÅ ‡πÑ‡∏ü‡∏•‡πå‡∏≠‡∏¥‡∏ô‡∏û‡∏∏‡∏ï: /Users/sarawut/Desktop/oneclick/thai_fish_sequences_ON_COI.fasta
  üéØ ‡∏™‡∏õ‡∏µ‡∏ä‡∏µ‡∏™‡πå‡∏´‡∏•‡∏±‡∏Å: ['oreochromis niloticus']
  üìä