In [1]:
def encode_rle(x):
    import numpy as np
    x_shift = np.roll(x, 1)
    mask = x_shift != x
    mask[0] = True
    unique_num = x[mask]
    ind_start = np.where(mask)[0]
    ind_end = np.roll(ind_start, -1)
    ind_end[-1] = x.size 
    unique_len = ind_end - ind_start
    return (unique_num, unique_len)

In [101]:
import numpy as np

class RleSequence:
    
    def __init__(self, input_sequence):
        self.size = input_sequence.size
        self.num_, self.len_ = encode_rle(input_sequence)
        
    def __getitem__(self, item):
        if isinstance(item, slice):
            now = item.start
            stop = item.stop
            step = item.step
            if step == None:
                step = 1
            if now == None:
                now = 0
            if stop == None:
                stop = self.size
            if now < 0:
                now = self.size + now
            if stop < 0:
                stop = self.size + stop
            if now >= self.size or now < 0:
                raise IndexError
            stop = min(stop, self.size)
            
            ans = np.arange(now, stop, step, dtype=self.num_.dtype)
            i_num = 0
            cur_len = self.len_[0]
            
            for i in range(ans.size):
                while ans[i] >= cur_len:
                    i_num += 1
                    cur_len += self.len_[i_num]
                ans[i] = self.num_[i_num]
                now += step
                
            return ans
                    
        else:
            if item < 0:
                item = self.size + item
            if item >= self.size or item < 0:
                raise IndexError
            cur_len = 0
            for i_num in range(self.len_.size):
                cur_len += self.len_[i_num]
                if item < cur_len:
                    return self.num_[i_num]
                
    def __iter__(self):
        self.i_ = 0
        self.i_num_ = 0
        return self
    
    def __next__(self):
        if self.i_num_ == self.num_.size:
            raise StopIteration
        ans = self.num_[self.i_num_]
        self.i_ += 1
        if self.len_[self.i_num_] == self.i_:
            self.i_num_ += 1
            self.i_ = 0
        return ans
    
    def __contains__(self, item):
        return np.any(self.num_ == item)


In [102]:
rle_seq = RleSequence(np.array([1, 1, 2, 2, 3, 4, 5]))

In [103]:
rle_seq[-4: 6: 1]

array([2, 3, 4])

In [104]:
rle_seq = RleSequence(np.array([2, 2, 3, 1, 0, 2, 0, 2, 1, 0, 0, 0,
                                1, 0, 1, 2, 3, 0, 1, 2, 1, 3, 0,
                                2, 3, 2, 1, 1, 1, 0]))
print(rle_seq[1:5:2])
print(rle_seq[1:20:3])
print(rle_seq[4:29:3])
print(rle_seq[4:7:1])
print(rle_seq[17:22:4])
print(rle_seq[2:2:1])

[2 1]
[2 0 2 0 0 3 2]
[0 2 0 0 3 2 0 2 1]
[0 2 0]
[0 3]
[]


In [105]:
rle_seq = RleSequence(np.array([2, 2, 3, 1, 0, 2, 0, 2, 1, 0, 0, 0,
                                1, 0, 1, 2, 3, 0, 1, 2, 1, 3, 0,
                                2, 3, 2, 1, 1, 1, 0]))
print(rle_seq[1:5:])
print(rle_seq[1:20:])
print(rle_seq[4:29:])
print(rle_seq[4:7:])
print(rle_seq[17:22:])
print(rle_seq[2:2:])

[2 3 1 0]
[2 3 1 0 2 0 2 1 0 0 0 1 0 1 2 3 0 1 2]
[0 2 0 2 1 0 0 0 1 0 1 2 3 0 1 2 1 3 0 2 3 2 1 1 1]
[0 2 0]
[0 1 2 1 3]
[]


In [106]:
rle_seq = RleSequence(np.array([2, 2, 3, 1, 0, 2, 0, 2, 1, 0, 0, 0,
                                1, 0, 1, 2, 3, 0, 1, 2, 1, 3, 0,
                                2, 3, 2, 1, 1, 1, 0]))
print(rle_seq[1::2])
print(rle_seq[1::3])
print(rle_seq[4::3])
print(rle_seq[4::1])
print(rle_seq[17::4])
print(rle_seq[2::1])

[2 1 2 2 0 0 0 2 0 2 3 2 2 1 0]
[2 0 2 0 0 3 2 0 2 1]
[0 2 0 0 3 2 0 2 1]
[0 2 0 2 1 0 0 0 1 0 1 2 3 0 1 2 1 3 0 2 3 2 1 1 1 0]
[0 3 2 0]
[3 1 0 2 0 2 1 0 0 0 1 0 1 2 3 0 1 2 1 3 0 2 3 2 1 1 1 0]


In [107]:
rle_seq = RleSequence(np.array([1, 1, 2, 1, 2, 2, 3, 3]))
for i in range(8):
    print(rle_seq[i])

1
1
2
1
2
2
3
3


In [108]:
%%time
np.random.seed(32)
tmp_array = np.random.randint(0, 3, 10 ** 6)

rle_seq = RleSequence(tmp_array)

sum_elements = 0
tmp = rle_seq[1:905005:2]
print(np.sum(tmp))

452902
Wall time: 2.46 s


In [134]:
from collections.abc import Iterable

class linearize:
    
    def __init__(self, sequence):
        self.len = len(sequence)
        self.iter = iter(sequence)
        
    def __iter__(self):
        self.subiter = None
        return self
    
    def __next__(self):
        while (1):
            if self.subiter == None:
                element = next(self.iter)
                if not (isinstance(element, str) and len(element) < 2) and isinstance(element, Iterable):
                    self.subiter = iter(linearize(element))
                else:
                    return element

            # subiter != None

            try:
                element = next(self.subiter)
                return element
            except StopIteration:
                self.subiter = None


In [17]:
import numpy as np

def get_indices(sequence, indices):
    if isinstance(sequence, list):
        return [sequence[i] for i in indices]
    return sequence[indices]

def BatchGenerator(list_of_sequences, batch_size, shuffle=False):
    if len(list_of_sequences) != 0:
        length = len(list_of_sequences[0])
        if shuffle:
            indices_permutation = np.random.permutation(length)
        else:
            indices_permutation = np.arange(length)
        for j in range(0, length, batch_size):
            indices = indices_permutation[j:j+batch_size]
            yield [get_indices(seq, indices) for seq in list_of_sequences]

In [22]:
bg = BatchGenerator(list_of_sequences=[[1, 2, 3, 5, 1, 'a'], [0, 0, 1, 1, 0, 1]], batch_size=4, shuffle=True)

In [23]:
for elem in bg:
    print(elem)

[['a', 1, 1, 5], [1, 0, 0, 1]]
[[2, 3], [0, 1]]


In [24]:
def WordContextGenerator(words, window_size):
    length = len(words)
    for i in range(length):
        begin = max(0, i - window_size) 
        end = min(length, i + window_size + 1)
        for j in range(begin, end):
            if i != j:
                yield words[i], words[j]

In [25]:
s = ['мама', "очень", "хорошо", "мыла", "красивую", "раму"]


In [26]:
for elem in WordContextGenerator(s, 2):
    print(elem)

('мама', 'очень')
('мама', 'хорошо')
('очень', 'мама')
('очень', 'хорошо')
('очень', 'мыла')
('хорошо', 'мама')
('хорошо', 'очень')
('хорошо', 'мыла')
('хорошо', 'красивую')
('мыла', 'очень')
('мыла', 'хорошо')
('мыла', 'красивую')
('мыла', 'раму')
('красивую', 'хорошо')
('красивую', 'мыла')
('красивую', 'раму')
('раму', 'мыла')
('раму', 'красивую')
