Add manuscript

curiousily · curiousily · commit 1b79ed0c7163 · 2020-02-06T08:33:18.000+02:00
diff --git a/.gitignore b/.gitignore
@@ -127,3 +127,4 @@ dmypy.json
 
 # Pyre type checker
 .pyre/
+.DS_Store
diff --git a/ipynb2lmd.py b/ipynb2lmd.py
@@ -0,0 +1,213 @@
+import json
+import sys
+import os
+import io
+import re
+
+def join(lines):
+    return ''.join(lines)
+
+def in_prompt(prompt_number):
+    return 'In [%d]: ' % prompt_number
+
+def out_prompt(prompt_number):
+    return 'Out[%d]: ' % prompt_number
+
+def add_prompt(lines, prompt):
+    "add the prompt on the first line, indent all other lines accordingly"
+    indentation = ' '*len(prompt)
+    return [prompt+lines[0]] + [indentation+l for l in lines[1:]]
+
+def indent(lines):
+    "add indentation required for code samples in Markdown"
+    return ['    '+l for l in lines]
+
+def code(lines):
+    return join(indent(lines))
+
+
+formulas = re.compile(r'(\$\$?)([^\$]+)(\$\$?)')
+def replace_formulas(text):
+    "In Leanpub Markdown, formulas are delimited by {$$}...{/$$}"
+    return formulas.sub(r'{$$}\2{/$$}', text)
+
+def text(lines):
+    return replace_formulas(join(lines))
+
+
+def convert_markdown(cell, out):
+    content = text(cell['source'])
+    if content.startswith('#'):
+        # a heading
+        out.write(u'\n')
+    out.write(content)
+    out.write(u'\n\n')
+
+
+def convert_raw(cell, out):
+    out.write(join(cell['source']))
+    out.write(u'\n\n')
+
+
+def convert_code(cell, out, base_name, output_dir):
+    prompt_number = cell['execution_count']
+    if cell['source']:
+        out.write(code(add_prompt(cell['source'],
+                                  in_prompt(prompt_number))))
+    out.write(u'\n')
+    last_output_type = None
+    for output in cell['outputs']:
+        output_type = output['output_type']
+        if output_type == 'execute_result':
+            convert_result(output, out, prompt_number,
+                           continued = (output_type == last_output_type))
+        elif output_type == 'stream':
+            convert_stream(output, out, prompt_number,
+                           continued = (output_type == last_output_type))
+        elif output_type == 'error':
+            convert_error(output, out, prompt_number)
+        elif output_type == 'display_data':
+            if last_output_type in ['execute_result', 'stream']:
+                out.write(u'\n\n')
+            convert_image(output, out, base_name, output_dir, prompt_number)
+        else:
+            raise Exception('unknown output type: %s' % output_type)
+        last_output_type = output_type
+    if last_output_type in ['execute_result', 'stream'] and not (
+            'data' in output and 'text/html' in output['data']):
+        out.write(u'\n\n')
+    out.write(u'\n')
+
+def convert_result(output, out, prompt_number, continued=False):
+    out.write(u'    \n')
+    if 'data' in output and 'text/html' in output['data']:
+        if not continued:
+            out.write(code(add_prompt([u''],
+                                      out_prompt(prompt_number))))
+            out.write(u'\n')
+        convert_html(join(output['data']['text/html']), out)
+    else:
+        prompt = out_prompt(prompt_number)
+        if continued:
+            # we don't want the prompt, but we need to indent as if it
+            # was there.
+            prompt = ' '*len(prompt)
+        out.write(code(add_prompt(output['data']['text/plain'], prompt)))
+
+def convert_stream(output, out, prompt_number, continued=False):
+    out.write(u'    \n')
+    prompt = out_prompt(prompt_number)
+    if continued:
+        # we don't want the prompt, but we need to indent as if it
+        # was there.
+        prompt = ' '*len(prompt)
+    out.write(code(add_prompt(output['text'], prompt)))
+
+table_html = re.compile(r'<table.*?>(.*)</table>', re.DOTALL)
+def convert_html(html, out):
+    match = table_html.search(html)
+    if match:
+        convert_table(match.group(1), out)
+    else:
+        raise Exception('Unknown html: %s' % html)
+
+row_html = re.compile(r'<tr.*?>(.*?)</tr>', re.DOTALL)
+cell_html = re.compile(r'<t[dh].*?>(.*?)</t[dh]>', re.DOTALL)
+def convert_table(table, out):
+    data = []
+    rows = row_html.findall(table)
+    for r in rows:
+        data.append([ x.strip() for x in cell_html.findall(r) ])
+
+    widths = [ max(len(d[i]) for d in data)
+               for i in range(len(data[0])) ]
+    format = '|' + '|'.join([' %%%ds ' % w for w in widths ]) + '|\n'
+
+    total_width = len(format % tuple('' for e in widths))
+    if total_width <= 60:
+        width = "narrow"
+    elif total_width >= 80:
+        width = "wide"
+    else:
+        width = "default"
+    out.write(u'\n{width="%s"}\n' % width)
+
+    out.write(format % tuple(data[0]))
+    out.write('|' + '|'.join([ u'-'*(w+2) for w in widths ]) + '|\n')
+    for d in data[1:]:
+        out.write(format % tuple(d))
+    out.write(u'\n\n')
+
+
+terminal_codes = re.compile(r'.\[[01](;\d\d)?m')
+def convert_error(output, out, prompt_number):
+    def unescape_terminal_codes(line):
+        return terminal_codes.sub('', line)
+    out.write(u'    \n')
+    # There are embedded \n in the lines...
+    lines = [ l+'\n' for line in output['traceback'] for l in line.split('\n') ]
+    # ...and control codes for the terminal
+    out.write(code(add_prompt([ unescape_terminal_codes(l) for l in lines ],
+                              out_prompt(prompt_number))))
+    out.write(u'\n\n')
+
+def convert_image(output, out, base_name, output_dir, prompt_number):
+    ext = extension(output)
+    images_dir = os.path.join(output_dir, 'images')
+    if not os.path.exists(images_dir):
+        os.mkdir(images_dir)
+    image_name = '%s-%d.%s' % (base_name.replace(' ','_'),
+                               prompt_number, ext)
+    image_path = os.path.join(images_dir, image_name)
+    with open(image_path, 'w') as image:
+        image.write(output['data']['image/%s' % ext].decode('base64'))
+    out.write(u'\n')
+    out.write(u'![](images/%s)' % image_name)
+    out.write(u'\n\n')
+
+def extension(output):
+    candidates = set(output['data'].keys()) - {'text/plain'}
+    # whatever key remains should be the extension
+    if len(candidates) > 1:
+        raise Exception('multiple extensions found: %s' % candidates)
+    candidate = str(candidates.pop())
+    if not candidate.startswith('image/'):
+        raise Exception('not an image type: %s' % candidate)
+    return candidate[6:]
+
+def convert(path, output_dir):
+    _, filename = os.path.split(path)
+    base_name, _ = os.path.splitext(filename)
+    base_name = base_name.lower()
+    md_name = base_name + '.md'
+    
+    with open(path) as f:
+        data = json.load(f)
+    cells = data['cells']
+
+    with io.open(os.path.join(output_dir, md_name), 'w') as out:
+        for cell in cells:
+            cell_type = cell['cell_type']
+            if cell_type == 'markdown':
+                convert_markdown(cell, out)
+            elif cell_type == 'code':
+                convert_code(cell, out, base_name, output_dir)
+            elif cell_type == 'raw':
+                convert_raw(cell, out)
+            else:
+                raise Exception('unknown cell type: %s' % cell_type)
+
+
+if __name__ == '__main__':
+    if len(sys.argv) != 3:
+        print("""
+    Usage: %s notebook.ipynb output_dir
+
+    The file notebook.md will be created in output_dir; if the
+    notebook contains images, they will be extracted and stored
+    in the output_dir/images folder.
+        """ % sys.argv[0])
+        sys.exit(1)
+
+    convert(os.path.abspath(sys.argv[1]), sys.argv[2])
+
diff --git a/manuscript/01.getting-started-with-pytorch.md b/manuscript/01.getting-started-with-pytorch.md
@@ -0,0 +1,149 @@
+    In [2]: !pip install -U torch torchvision
+    
+    Out[2]: Requirement already up-to-date: torch in /usr/local/lib/python3.6/dist-packages (1.4.0)
+            Requirement already up-to-date: torchvision in /usr/local/lib/python3.6/dist-packages (0.5.0)
+            Requirement already satisfied, skipping upgrade: numpy in /usr/local/lib/python3.6/dist-packages (from torchvision) (1.17.5)
+            Requirement already satisfied, skipping upgrade: pillow>=4.1.1 in /usr/local/lib/python3.6/dist-packages (from torchvision) (6.2.2)
+            Requirement already satisfied, skipping upgrade: six in /usr/local/lib/python3.6/dist-packages (from torchvision) (1.12.0)
+
+
+
+    In [3]: !pip install watermark
+    
+    Out[3]: Collecting watermark
+              Downloading https://files.pythonhosted.org/packages/60/fe/3ed83b6122e70dce6fe269dfd763103c333f168bf91037add73ea4fe81c2/watermark-2.0.2-py2.py3-none-any.whl
+            Requirement already satisfied: ipython in /usr/local/lib/python3.6/dist-packages (from watermark) (5.5.0)
+            Requirement already satisfied: prompt-toolkit<2.0.0,>=1.0.4 in /usr/local/lib/python3.6/dist-packages (from ipython->watermark) (1.0.18)
+            Requirement already satisfied: simplegeneric>0.8 in /usr/local/lib/python3.6/dist-packages (from ipython->watermark) (0.8.1)
+            Requirement already satisfied: pickleshare in /usr/local/lib/python3.6/dist-packages (from ipython->watermark) (0.7.5)
+            Requirement already satisfied: pygments in /usr/local/lib/python3.6/dist-packages (from ipython->watermark) (2.1.3)
+            Requirement already satisfied: traitlets>=4.2 in /usr/local/lib/python3.6/dist-packages (from ipython->watermark) (4.3.3)
+            Requirement already satisfied: pexpect; sys_platform != "win32" in /usr/local/lib/python3.6/dist-packages (from ipython->watermark) (4.8.0)
+            Requirement already satisfied: decorator in /usr/local/lib/python3.6/dist-packages (from ipython->watermark) (4.4.1)
+            Requirement already satisfied: setuptools>=18.5 in /usr/local/lib/python3.6/dist-packages (from ipython->watermark) (45.1.0)
+            Requirement already satisfied: six>=1.9.0 in /usr/local/lib/python3.6/dist-packages (from prompt-toolkit<2.0.0,>=1.0.4->ipython->watermark) (1.12.0)
+            Requirement already satisfied: wcwidth in /usr/local/lib/python3.6/dist-packages (from prompt-toolkit<2.0.0,>=1.0.4->ipython->watermark) (0.1.8)
+            Requirement already satisfied: ipython-genutils in /usr/local/lib/python3.6/dist-packages (from traitlets>=4.2->ipython->watermark) (0.2.0)
+            Requirement already satisfied: ptyprocess>=0.5 in /usr/local/lib/python3.6/dist-packages (from pexpect; sys_platform != "win32"->ipython->watermark) (0.6.0)
+            Installing collected packages: watermark
+            Successfully installed watermark-2.0.2
+
+
+
+    In [0]: %load_ext watermark
+
+    In [5]: %watermark -v -p numpy,torch
+    
+    Out[5]: CPython 3.6.9
+            IPython 5.5.0
+            
+            numpy 1.17.5
+            torch 1.4.0
+
+
+
+
+# PyTorch ❤️ NumPy
+
+Do you know NumPy? If you do, learning PyTorch will be a breeze! If you don't, prepare to learn the skills that will guide you on your journey Machine Learning Mastery!
+
+Let's start with something simple:
+
+    In [0]: import torch
+            import numpy as np
+
+    In [7]: a = np.array([1, 2])
+            b = np.array([8, 9])
+            
+            c = a + b
+            c
+    
+    Out[7]: array([ 9, 11])
+
+
+Adding the same arrays with PyTorch looks like this:
+
+    In [8]: a = torch.tensor([1, 2])
+            b = torch.tensor([8, 9])
+            
+            c = a + b
+            c
+    
+    Out[8]: tensor([ 9, 11])
+
+
+Fortunately, you can go from NumPy to PyTorch:
+
+    In [9]: a = torch.tensor([1, 2])
+            
+            a.numpy()
+    
+    Out[9]: array([1, 2])
+
+
+ and vice versa:
+
+    In [10]: a = np.array([1, 2])
+             torch.from_numpy(a)
+    
+    Out[10]: tensor([1, 2])
+
+
+The good news is that the conversions incur almost no cost on the performance of your app. The NumPy and PyTorch store data in memory in the same way. That is, PyTorch is reusing the work done by NumPy.
+
+# Tensors
+
+Tensors are just n-dimensional number (including booleans) containers. You can find the complete list of supported data types at [PyTorch's Tensor Docs](https://pytorch.org/docs/stable/tensors.html).
+
+So, how can you create a Tensor (try to ignore that I've already shown you how to do it)?
+
+
+
+    In [11]: torch.tensor([[1, 2], [2, 1]])
+    
+    Out[11]: tensor([[1, 2],
+                     [2, 1]])
+
+
+You can create a tensor from floats:
+
+    In [12]: torch.FloatTensor([[1, 2], [2, 1]])
+    
+    Out[12]: tensor([[1., 2.],
+                     [2., 1.]])
+
+
+Or define the type like so:
+
+    In [15]: torch.tensor([[1, 2], [2, 1]], dtype=torch.bool)
+    
+    Out[15]: tensor([[True, True],
+                     [True, True]])
+
+
+You can use a wide range of factory methods to create Tensors without manually specifying each number. For example, you can create a matrix with random numbers like this: 
+
+    In [17]: torch.rand(3, 2)
+    
+    Out[17]: tensor([[0.5853, 0.7024],
+                     [0.5224, 0.3153],
+                     [0.9657, 0.1873]])
+
+
+Or one full of ones:
+
+    In [18]: torch.ones(3, 2)
+    
+    Out[18]: tensor([[1., 1.],
+                     [1., 1.],
+                     [1., 1.]])
+
+
+
+# Running on GPU
+
+
+# Common Issues
+
+    In [0]: 
+
diff --git a/manuscript/Book.txt b/manuscript/Book.txt
@@ -0,0 +1 @@
+01.getting-started-with-pytorch.md
diff --git a/manuscript/resources/readme.txt b/manuscript/resources/readme.txt
@@ -0,0 +1,22 @@
+# About The resources Folder
+
+You put resources like images in this folder.
+
+Here's how you insert an image from the resources folder:
+
+![Palm Trees](palm-trees.jpg)
+
+You don't need to add a caption:
+
+![](palm-trees.jpg)
+
+To set a cover image, just name an image title_page.png or title_page.jpg and
+put it in this folder. If there is no cover image, Leanpub will just make a
+plain book cover with your book title and your name.
+
+To learn more about how to size your images (including your cover image) based
+on your book size, see:
+Author > Books > (your book) > Settings > Image Sizing Help
+
+To learn more about resource paths, see:
+https://leanpub.com/markua/read#leanpub-auto-local-resources

Original file line number	Diff line number	Diff line change
`@@ -127,3 +127,4 @@ dmypy.json`
`127`	`127`
`128`	`128`	`# Pyre type checker`
`129`	`129`	`.pyre/`
	`130`	`+.DS_Store`