In [1]:
def analyze_sequence(sequence, pattern):
    """
    Analyzes a sequence to find occurrences of a given pattern and determines orientations.
    
    Args:
        sequence (str): The input sequence (e.g., "Aalpha Bbeta gammaC ...")
        pattern (str): The pattern to search for (e.g., "AalphaBbeta")
    
    Returns:
        list: A list of dictionaries containing match information
    """
    
    # First, split the pattern into its component pairs
    pattern_pairs = []
    i = 0
    while i < len(pattern):
        # Find the next uppercase letter which starts a new pair
        if i+1 < len(pattern) and pattern[i+1].islower():
            # Standard orientation (Aalpha)
            pair = pattern[i] + pattern[i+1]
            pattern_pairs.append((pair, 'standard'))
            i += 2
        elif pattern[i].islower() and i > 0 and pattern[i-1].isupper():
            # Reverse orientation (alphaA)
            pair = pattern[i] + pattern[i-1]
            pattern_pairs.append((pair, 'reverse'))
            # Don't increment i here as we've already processed the uppercase
        else:
            i += 1
    
    if not pattern_pairs:
        return []
    
    # Convert pattern pairs to regex pattern
    regex_patterns = []
    for pair, orientation in pattern_pairs:
        if orientation == 'standard':
            # Matches either "Aalpha" or "A alpha"
            regex_patterns.append(f"{pair[0]}\\s*{pair[1]}")
        else:
            # Matches either "alphaA" or "alpha A"
            regex_patterns.append(f"{pair[0]}\\s*{pair[1]}")
    
    full_regex = ''.join(regex_patterns)
    
    # Find all matches in the sequence
    import re
    matches = []
    for match in re.finditer(full_regex, sequence.replace(" ", "")):
        start_pos = match.start()
        end_pos = match.end()
        matched_text = match.group()
        
        # Now determine the orientation of each pair in the match
        pair_info = []
        i = 0
        while i < len(matched_text):
            if i+1 < len(matched_text):
                if matched_text[i].isupper() and matched_text[i+1].islower():
                    # Standard orientation
                    pair_info.append({
                        'pair': matched_text[i] + matched_text[i+1],
                        'orientation': 'standard',
                        'representation': matched_text[i] + matched_text[i+1]
                    })
                    i += 2
                elif matched_text[i].islower() and matched_text[i+1].isupper():
                    # Reverse orientation
                    pair_info.append({
                        'pair': matched_text[i] + matched_text[i+1],
                        'orientation': 'reverse',
                        'representation': matched_text[i] + matched_text[i+1]
                    })
                    i += 2
                else:
                    i += 1
            else:
                i += 1
        
        matches.append({
            'start': start_pos,
            'end': end_pos,
            'matched_text': matched_text,
            'pairs': pair_info
        })
    
    return matches


In [2]:

# Example usage
if __name__ == "__main__":
    sequence = "Aalpha Bbeta gammaC Ddelta epsilonE Fzeta"
    pattern = "AalphaBbeta"
    
    results = analyze_sequence(sequence, pattern)
    
    print(f"Searching for pattern '{pattern}' in sequence: '{sequence}'")
    print("\nResults:")
    for result in results:
        print(f"\nMatch found at positions {result['start']}-{result['end']}:")
        print(f"Full match: {result['matched_text']}")
        print("Pair details:")
        for pair in result['pairs']:
            print(f"  - Pair: {pair['pair']}, Orientation: {pair['orientation']}, Representation: {pair['representation']}")

Searching for pattern 'AalphaBbeta' in sequence: 'Aalpha Bbeta gammaC Ddelta epsilonE Fzeta'

Results:

Match found at positions 0-10:
Full match: AalphaBbet
Pair details:
  - Pair: Aa, Orientation: standard, Representation: Aa
  - Pair: aB, Orientation: reverse, Representation: aB
