In [27]:
import collections
import numpy as np

with open("assets/alice_oz.txt", 'r') as f:
    text = f.readline()
splitText = text.split()

NPREF = 2
NONWORD = "\n"
MAXGEN = 200

class Prefix:
    
    def __init__(self, n, string):
        self.__multiplier = 31
        self.p = collections.deque()
        
        for i in range(n):
            self.p.append(string)
        
    def __hash__(self):
        h = 0
        for i in range(len(self.p)):
            h = self.__multiplier * h + hash(self.p[i])
        return h
    
    def __eq__(self, other):
        for i in range(len(self.p)):
            if (self.p[i] != other.p[i]):
                return False
        return True
    
    def clone(self):
        copyP = self.p.copy()
        copy = Prefix(NPREF, NONWORD)
        copy.p = copyP
        return copy
    
class Chain:
    def __init__(self, NPREF = 2, NONWORD = "\n", MAXGEN = 200):
        # map<Prefix, vector<string>>
        self.statetab = {}
        self.NPREF = NPREF
        self.NONWORD = NONWORD
        self.MAXGEN = MAXGEN
        
        self.prefix = Prefix(self.NPREF, self.NONWORD)

    def add(self, string):
        suf =  self.statetab.get(self.prefix)
        if not suf:
            suf = []
            self.statetab[self.prefix.clone()] = suf
            
        suf.append(string)
        self.prefix.p.popleft()
        self.prefix.p.append(string)
        

    def build(self, inStream):
        for i in inStream:
            self.add(i)
        self.add(self.NONWORD)

    def generate(self, nwords):
        chain = []
        new = Prefix(self.NPREF, self.NONWORD)
        for i in range(0, nwords):
            s = self.statetab.get(new)
            
            if not s:
                return "No state"

            r = np.random.randint(0, 9999999999) % len(s)
            word = s[r]

            if word == self.NONWORD:
                break;
            chain.append(word)
            new.p.popleft()
            new.p.append(word)
        return chain

In [56]:
c = Chain()
c.build(splitText)

In [59]:
' '.join(c.generate(30))

"Alice was only sobbing,' she thought, 'and hand round the neck of the West; and that kind of you must all serve him, and very soon finished it off. 'If"

-----

In [37]:
import pandas as pd
df = pd.read_csv('assets/bach_choral_set_dataset.csv')

In [38]:
df['choral_ID'].unique()

array(['000106b_', '000206b_', '000306b_', '000408b_', '000507b_',
       '000606b_', '000707b_', '000907b_', '001007b_', '001106b_',
       '001207b_', '001306b_', '001606b_', '001707b_', '001805b_',
       '001907ch', '002406bs', '002506b_', '002806b_', '002908ch',
       '003006b_', '003109b_', '003206b_', '003306b_', '003608b2',
       '003806b_', '003907b_', '003907bv', '004006b_', '004008b_',
       '005708b_', '012006b_', '012106b_', '012206b_', '012306b_',
       '012406b_', '012506b_', '012606b_', '012606bv', '012705b_',
       '012805b_', '013506b_', '013705ch', '013906b_', '014007b_',
       '014403b_', '014406b_', '014500ba', '014505b_', '014505bv',
       '014608b_', '014806b_', '014806bv', '015105b_', '015301b_',
       '015305b_', '015309b_', '015403b_', '015408b_', '015505b_'],
      dtype=object)

In [50]:
first = df[df['choral_ID'] == '015505b_']
first.head()

Unnamed: 0,choral_ID,event_number,pitch_1,pitch_2,pitch_3,pitch_4,pitch_5,pitch_6,pitch_7,pitch_8,pitch_9,pitch_10,pitch_11,pitch_12,bass,meter,chord_label
5556,015505b_,1,YES,NO,NO,NO,NO,YES,NO,NO,NO,YES,NO,NO,F,5,F_M
5557,015505b_,2,YES,NO,NO,NO,NO,YES,NO,NO,NO,NO,YES,NO,F,3,F_M
5558,015505b_,3,YES,NO,NO,NO,NO,YES,NO,NO,NO,YES,NO,NO,A,4,F_M
5559,015505b_,4,YES,NO,YES,NO,NO,YES,NO,NO,NO,NO,YES,NO,Bb,3,F_M
5560,015505b_,5,YES,NO,NO,NO,YES,NO,NO,NO,NO,NO,NO,NO,C,5,C_M


In [51]:
bach_chain = Chain()
bach_chain.build(first['chord_label'])

In [44]:
f1 = bach_chain.statetab

In [52]:
second = df[df['choral_ID'] == '000306b_']
bach_chain.build(second['chord_label'])

In [53]:
f2 = bach_chain.statetab

In [54]:
f1 == f2

False