In [7]:
class Index(object): # Changed to 
    def __init__(self, t, k, ival):
        ''' Create index from all substrings of size 'k' with 'ival' skips '''
        self.k = k  # k-mer length (k)
        self.ival = ival  # Interval for skipping
        self.index = {}
        for i in range(len(t) - k + 1):  # For each possible k-mer
            if i % ival == 0:  # Apply skips based on interval value
                kmer = t[i:i+k]
                if kmer not in self.index:
                    self.index[kmer] = [i]
                else:
                    self.index[kmer].append(i)

    def query(self, p):
        ''' Return index hits for first k-mer of P with consideration of skips '''
        kmer = p[:self.k]  # query with first k-mer
        return self.index.get(kmer, [])[:]  # Returns p 

    def displayIndex(self):
        ''' Displays the content of the index '''
        for kmer, positions in self.index.items():
            print(f"{kmer}: {positions}")

In [21]:
def queryIndex(p, t, index):
    k = index.k
    offsets = []
    for i in index.query(p):
        if p[k:] == t[i+k:i+len(p)]:  # verify that rest of P matches
            offsets.append(i)
    return offsets

In [24]:
t = 'ACTTGGAGATCTTTGAGGCTAGGTATTCGGGATCGAAGCTCATTTCGGGGATCGATTACGATATGGTGGGTATTCGGGA'
p = 'GGTATTCGGGA'

In [23]:
# Index with k = 4 and ival = 2
index = Index(t, 4, 2)

In [25]:
print(queryIndex(p, t, index))

[68]


In [27]:
# Trying with smaller pattern
index = Index(t, 2, 1)
print(queryIndex('AT', t, index))

[8, 24, 31, 41, 50, 54, 60, 62, 71]
