In [1]:
from bayesian_classifier.poems import BayesianCorpus, get_confusion_matrix, combine_vocabs

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import requests
raw_poe = requests.get('https://raw.githubusercontent.com/lazyprogrammer/machine_learning_examples/master/hmm_class/edgar_allan_poe.txt').content
raw_frost = requests.get('https://raw.githubusercontent.com/lazyprogrammer/machine_learning_examples/master/hmm_class/robert_frost.txt').content

In [3]:
frost_corpus = BayesianCorpus(raw_frost, 'frost', ragged=True)
poe_corpus = BayesianCorpus(raw_poe, 'poe', ragged=True)

In [4]:
combine_vocabs([frost_corpus, poe_corpus])

In [5]:
res = get_confusion_matrix([frost_corpus, poe_corpus])
print(res)

Building transition matrix with size 2238 X 2238
Error, transition matrix probabilities for label frost sum to outside acceptable range.
Building transition matrix with size 2238 X 2238
Error, transition matrix probabilities for label poe sum to outside acceptable range.
          pred_frost  pred_poe
is_frost         146       250
is_poe            34       166


In [6]:
frost_corpus.tmat

<2238x2238 sparse matrix of type '<class 'numpy.float64'>'
	with 6524 stored elements in Compressed Sparse Row format>

In [7]:
frost_sample = frost_corpus.train[0]
poe_sample = poe_corpus.train[0]

frost_prob = frost_corpus.infer_logprob(frost_sample)
poe_prob = poe_corpus.infer_logprob(frost_sample)
print(frost_prob, poe_prob)

frost2_prob = frost_corpus.infer_logprob(poe_sample)
poe2_prob = poe_corpus.infer_logprob(poe_sample)
print(frost2_prob, poe2_prob)

-82.4465125393979 -87.79142757188683
-72.39662475909547 -61.15758858904695


In [8]:
get_confusion_matrix([frost_corpus, poe_corpus], test=False)

Unnamed: 0,pred_frost,pred_poe
is_frost,1145,40
is_poe,57,540


In [9]:
import dask.bag as db
def stream_from_url(url):
    with requests.get(url, stream=True) as response:
        for line in response.iter_lines():
            yield line.decode('utf-8')

url = "https://raw.githubusercontent.com/lazyprogrammer/machine_learning_examples/master/hmm_class/edgar_allan_poe.txt"
bag = db.from_sequence(stream_from_url(url))

In [10]:
print(bag.take(10))

("LO! Death hath rear'd himself a throne", 'In a strange city, all alone,', 'Far down within the dim west')




In [11]:
import dask.bag as db
import dask
import requests

@dask.delayed
def load_url(url):
    with requests.get(url, stream=True) as r:
        return [line.decode('utf-8') for line in r.iter_lines() if line]

url = "https://raw.githubusercontent.com/lazyprogrammer/machine_learning_examples/master/hmm_class/edgar_allan_poe.txt"
delayed_bag = load_url(url)
bag = db.from_delayed([delayed_bag])  # Convert the delayed object to a Dask Bag

# Now you can operate on the bag as you normally would
result = bag.filter(lambda x: "fair" in x).compute()

In [12]:
print(bag.take(10))
print(result)

("LO! Death hath rear'd himself a throne", 'In a strange city, all alone,', 'Far down within the dim west', 'Where the good, and the bad, and the worst, and the best,', 'Have gone to their eternal rest.', '\u2009', 'There shrines, and palaces, and towers', 'Are not like any thing of ours', 'Oh no! O no! ours never loom', 'To heaven with that ungodly gloom!')
['Fair isle, that from the fairest of all flowers,', 'How fairy-like a melody there floats ', 'Once fair and stately palace --', 'Over fabric half so fair.', 'Was the fair palace door,']
