In [1]:
import json
import pandas as pd
from fastcore.all import *
from urllib.request import urlopen


# Setup

In [2]:
# Load NB
url = 'https://raw.githubusercontent.com/fastai/apl-study/master/posts/APL.ipynb'
nb = json.loads(urlopen(url).read())
#Initialize Lists
cards = []
failed_idxs = []

# Helpers

In [3]:
# Define newlines on cards
def collapse_list(l): return '<br/>'.join(l) + '<br/>'

# Make Code Cards

1. Get all code cells that have both code and outputs
1. Create 1 card per code cell:  Given the code (front), predict that output (back)

In [4]:
def cell_filter(cell):
    cond1 = cell['cell_type'] == 'code'
    cond2 = ('outputs' in cell.keys()) and (cell['outputs'] != [])
    cond3 = cell['source'] != []
    return cond1 and cond2 and cond3

In [5]:
filtered_codes = L(nb['cells']).filter(cell_filter)

In [6]:
code_cards = []
for cell in filtered_codes:
    src = collapse_list(cell['source'])
    out = collapse_list(cell['outputs'][0]['data']['text/html'])
    code_cards.append(pd.DataFrame([[src,out]]))
pd.concat(code_cards).sample(5)

Unnamed: 0,0,1
0,2 3 1<br/>,"<span style=""white-space:pre; font-family: mon..."
0,⍴mat<br/>,"<span style=""white-space:pre; font-family: mon..."
0,(1 2)(3 4)'ab'<br/>,"<span style=""white-space:pre; font-family: mon..."
0,⍳4<br/>,"<span style=""white-space:pre; font-family: mon..."
0,3-2<br/>,"<span style=""white-space:pre; font-family: mon..."


# Make Markdown Cards

1. Get all markdown cells with L3/4/5 headers, and has something in backticks (ie a symbol)
1. For all unique items in backticks ('symbols') create 2 cards
    + From symbol (front), predict the name (back)
    + From name (frame), predict the symbol (back)

## Filter for valid headers
+ Markdown
+ L3/4/5 header
+ Has something in backticks (ie a symbol)

In [7]:
def cell_filter(cell): 
    cond1 = cell['cell_type']=='markdown'
    src = ''.join(cell['source'])
    cond2 = '###' in src
    cond3 = ' '.join(re.findall(r'`+.`',src)) != ''
    return cond1 and cond2 and cond3
filtered_headers = L(nb['cells']).filter(cell_filter)
cleaned_headers = L(collapse_list([v.replace('#','').strip() for v in o['source']]) for o in filtered_headers)
list(cleaned_headers[:5])

['`-` (Minus sign; Bar)<br/>',
 'Monadic `-` (Negate)<br/>',
 'Dyadic `-` (Minus; subtract)<br/>',
 '`+` (Plus sign)<br/>',
 'Monadic `+` (Conjugate)<br/>']

In [8]:
def get_unique_symbols(cleaned_headers):
    return L(re.findall(r'`+.`',''.join(cleaned_headers))).unique()
unique_symbols = get_unique_symbols(cleaned_headers)
unique_symbols

(#14) ['`-`','`+`','`÷`','`×`','`*`','`⍴`','`⍳`','`=`','`≠`','`<`'...]

In [9]:
md_cards = []
for sym in unique_symbols:
    desc = collapse_list(cleaned_headers.filter(lambda x: sym in x)).replace(sym,'')
    md_cards.append(pd.DataFrame([[sym,desc]]))
    md_cards.append(pd.DataFrame([[desc,sym]]))
pd.concat(md_cards).sample(5)

Unnamed: 0,0,1
0,(Equal underbar)<br/><br/>monadic (Depth)<br...,`≡`
0,`≥`,(Greater than or equal to sign)<br/><br/>dyad...
0,monadic (index generator)<br/><br/>dyadic (i...,`⍳`
0,`÷`,(Divide sign)<br/><br/>Monadic (Reciprocol)<...
0,(Minus sign; Bar)<br/><br/>Monadic (Negate)<...,`-`


# Combine and Write

In [10]:
pd.concat([pd.concat(code_cards),pd.concat(md_cards)]).to_csv('deck.csv',index=False,header=False)