In [13]:
def pattern_count(text: str, pattern: str) -> int:
    count = 0
    for i in range(len(text)):
        if i+len(pattern) <= len(text) and text[i:i+len(pattern)] == pattern:
            count += 1
    return count

print (pattern_count('ACGTACGTACGT', 'CG'))
print (pattern_count('ACGTACCGTACCCCGT', 'CC'))
            

3
4


In [15]:
def frequent_words(text: str, k: int) -> list[str]:
    kmers = {}
    
    for i in range(len(text) - k + 1):
        kmer = text[i:i+k]
        kmers[kmer] = kmers.get(kmer, 0) + 1
    
    max_freq = max(kmers.values())
    most_frequent_kmers = [kmer for kmer, freq in kmers.items() if freq == max_freq]
    
    return most_frequent_kmers

print(frequent_words('CGTTTTGAACATTTTCAACAAGTTTTGCAACATTTT', 4))


['TTTT']


In [16]:
def minimum_skew(genome: str) -> list[int]:
    skew = [0]  
    
    
    for nucleotide in genome:
        if nucleotide == 'G':
            skew.append(skew[-1] + 1)
        elif nucleotide == 'C':
            skew.append(skew[-1] - 1)
        else:
            skew.append(skew[-1])
    
    
    min_skew = min(skew)
    
    return [i for i, value in enumerate(skew) if value == min_skew]


genome = "CCCGGGCCGG"
print(minimum_skew(genome))  


[3]


In [21]:
def hamming_distance(s1: str, s2: str) -> int:
    """Calculate the Hamming distance between two strings."""
    return sum(c1 != c2 for c1, c2 in zip(s1, s2))

def neighbors(pattern: str, d: int) -> set:
    """Generate all k-mers within d mismatches of the given pattern."""
    nucleotides = "ACGT"
    if d == 0:
        return {pattern}
    if len(pattern) == 1:
        return set(nucleotides)
    
    neighborhood = set()
    suffix_neighbors = neighbors(pattern[1:], d)
    for text in suffix_neighbors:
        if hamming_distance(pattern[1:], text) < d:
            for nucleotide in nucleotides:
                neighborhood.add(nucleotide + text)
        else:
            neighborhood.add(pattern[0] + text)
    return neighborhood

def frequent_words_with_mismatches(text: str, k: int, d: int) -> list[str]:
    """Find the most frequent k-mers with up to d mismatches in a given text."""
    kmer_counts = {}
    
    # Generate all k-mers in the text
    for i in range(len(text) - k + 1):
        pattern = text[i:i+k]
        
        # Generate all neighbors (k-mers with up to d mismatches)
        for neighbor in neighbors(pattern, d):
            kmer_counts[neighbor] = kmer_counts.get(neighbor, 0) + 1
    
    # Find the maximum frequency
    max_freq = max(kmer_counts.values())
    
    # Get all k-mers with the maximum frequency
    return [kmer for kmer, count in kmer_counts.items() if count == max_freq]

# Example usage
text = "ACGTTGCATGTCGCATGATGCATGAGAGCT"
k, d = 4, 1
print("  ".join(frequent_words_with_mismatches(text, k, d)))  # Output: ATGT GATG ATGC


ATGT  GATG  ATGC
