In [1]:
import requests
from bs4 import BeautifulSoup
from IPython.core.display import HTML
from fastcore.all import *
import pandas as pd
import json
from datetime import datetime
import json
from urllib.request import urlopen

In [2]:
main_url = 'https://help.dyalog.com/18.2/Content/Language/Introduction/Language%20Elements.htm?tocpath=Language%20Reference%20Guide%7CSymbols%7C_____1'
content = requests.get(main_url)
content.encoding = content.apparent_encoding

main=BeautifulSoup(content.text)

symbol_tags = main.find_all('a',attrs={'href':lambda x: x.startswith('../Symbols/')})
symbols_dict = {o.text:o['href'] for o in symbol_tags}

# General Cards from Dyalog Docs

In [3]:
base = 'https://help.dyalog.com/18.2/Content/Language'

In [4]:
def get_hyperlink(url,url_type):
    return f" <br/><br/><a href='{url}'>Take me to {url_type}</a>"

In [5]:
card_names = []
cards = []
for i,sym in enumerate(L(*symbols_dict.keys())):
    doc_url = symbols_dict[sym]
    url1 = base + doc_url[2:]

    content = requests.get(url1)
    content.encoding = content.apparent_encoding
    doc_html = BeautifulSoup(content.text)

    doc_h5s = doc_html.find_all('h5')
    doc_h3 = doc_html.find('td').find('a',attrs={'name':lambda o: not o.startswith('kanchor')})
    
    
    if doc_html.find('td').text.strip() == '':
        _2 = doc_h3['name']
    else: _2 = doc_html.find('td').text.strip()
    
    # Name of glyph -> Symbol
    card_names.append(pd.DataFrame([[_2 + "<br/><br/>What does this glyph look like?",
                                sym + get_hyperlink(url1,"symbol documentation"),
                               "Name->Sym"
                               ]]))
    urls = L(url1)
    docs = ''
    for doc_h5 in doc_h5s:
        
        p = doc_h5.find_next_sibling('p')
        dy_or_mon_name = ''.join(re.findall('Dyadic|Monadic',doc_h5.text)) + ': ' + p.text.strip()
        
        # Mondadic/Dyadic Name -> Symbol
        card_names.append(pd.DataFrame([[dy_or_mon_name + "<br/><br/>What does this glyph look like?",
                                    sym + get_hyperlink(url1,"symbol documentation"),
                                   "Name->Sym"
                                   ]]))
        _2  = _2 + ' <br/> ' + dy_or_mon_name 
        
        
        url_end = [o.find('a',attrs={'href':True}) for o in doc_h5.find_next_siblings('p') if o.find('a',attrs={'href':True}) is not None][0]['href']
#         url2 = base + p.find('a',attrs={'href':True})['href'][2:]
        url2 = base + url_end[2:]
        urls.append(url2)
        
        content = requests.get(url2)
        content.encoding = content.apparent_encoding
        doc_html = BeautifulSoup(content.text)
    
        try:
            desc = [o.text for o in doc_html.find('td').find_all_next('p') if o.text.strip() != '']

            documentation = '<br/>'.join(desc)
            docs = docs + documentation + '<br/><br/>' 
            # Functional documentation -> Symbol
            cards.append(pd.DataFrame([[documentation + "<br/><br/>What does this glyph look like?",
                                        sym + get_hyperlink(url1,"symbol documentation") + get_hyperlink(url2,"primitive function documentation"),
                                       "Docs->Sym"
                                       ]]))
        except:
            print(i)
        try:
            doc_name = doc_html.find('td').find('a',attrs={'name':lambda o: not o.startswith('kanchor')})
            if doc_name['name'] not in _2:
                # Documentation Name -> Symbol
                cards.append(pd.DataFrame([[doc_name['name'] + "<br/><br/>What does this glyph look like?",
                                            sym + get_hyperlink(url1,"symbol documentation") + get_hyperlink(url2,"primitive function documentation"),
                                           "Name->Sym"
                                           ]]))

            # Documentation Name -> Functional Documentation
            # Functional Documentation -> Documentation Name
            cards.append(pd.DataFrame([
                    [doc_name['name'] + "<br/><br/>What does this glyph do?",
                     documentation + get_hyperlink(url1,"symbol documentation") + get_hyperlink(url2,"primitive function documentation"),
                     "Name->Docs"
                    ],
                    [documentation + "<br/><br/>What is the name of this glyph?",
                     doc_name['name'] + get_hyperlink(url1,"symbol documentation") + get_hyperlink(url2,"primitive function documentation"),
                    "Docs->Name"
                    ],
                               
                    ]))
        except:
            print(i)
    
    # Symbol -> Functional Documentation
    cards.append(pd.DataFrame([[sym + "<br/><br/>What does this glyph do?",
                                        docs + ''.join([get_hyperlink(o,"Link") for o in urls.unique()]),
                                       "Sym->Docs"
                                       ]]))
    # Symbol -> Names
    card_names.append(pd.DataFrame([[sym + "<br/><br/>What is this glyph called?  What's the Monadic/Dyadic names?",
                                _2 + get_hyperlink(url1,"symbol documentation"),
                               "Sym->Name"]]))

65
72
72
72


In [6]:
df = pd.concat(cards)
df.to_csv('decks/APL_DyalogDocs_Names.csv',index=False,header=False)
df.to_csv('decks/APL_DyalogDocs_Functionality.csv',index=False,header=False)

### Stuff that doesn't work quite right
+ A few documentation pages have code cells mixed into the text.  Currently it only pulls \<p> text cells so the code is lost.  That could be fixed, but it's infrequent and not a huge deal since there's hyperlinks.
+ Fix the try/except stuff

# General Cards from study group NB

In [7]:
url = 'https://raw.githubusercontent.com/fastai/apl-study/master/APL.ipynb'
nb = json.loads(urlopen(url).read())

In [8]:
# Define newlines on cards
def collapse_list(l): return '<br/>'.join(l) + '<br/>'

def get_hyperlink(url,url_type):
    return f" <br/><br/><a href='{url}'>Take me to {url_type}</a>"

In [9]:
def cell_filter(cell):
    cond1 = cell['cell_type'] == 'code'
    cond2 = ('outputs' in cell.keys()) and (cell['outputs'] != [])
    cond3 = cell['source'] != []
    return cond1 and cond2 and cond3

In [10]:
filtered_codes = L(nb['cells']).filter(cell_filter)

In [11]:
base = 'https://help.dyalog.com/18.2/Content/Language'

code_cards = []
for i,cell in enumerate(filtered_codes):
    _src = cell['source']
        
    _split = [[char for char in line] for line in _src]
    
    _split = [o for sublist in _split for o in sublist]
    _symbol_locs = {o:symbols_dict[o] for o in _split if o in filter_dict(symbols_dict,lambda k,v: k!='.').keys()}
    _symbol_locs

    urls = []
    
    src = collapse_list(_src)
    out = ''.join(cell['outputs'][0]['data']['text/html'])
    out = out + ''.join([get_hyperlink(base+u[2:],s) for s,u in _symbol_locs.items()])
    code_cards.append(pd.DataFrame([[src + f" <br/><br/>What does the code output?",out,"Code->Output"]]))

In [12]:
df = pd.concat(code_cards)

In [13]:
df.to_csv('decks/APL_FastaiStudyGroup_CodeCards.csv',index=False,header=False)

# APL Cart

In [14]:
url = 'https://raw.githubusercontent.com/abrudz/aplcart/master/table.tsv'
df = pd.read_csv(url, sep='\t')

In [15]:
front = df['SYNTAX']
back = df['DESCRIPTION'].astype(str) + ' <br/>'*2 + "<a href='" + df['DOCS'].astype(str) + "'>Link to docs<a/>"
g1 = pd.DataFrame({'front':front,'back':back})

In [16]:
front = df['DESCRIPTION']
back = df['SYNTAX'].astype(str) + ' <br/>'*2 + "<a href='" + df['DOCS'].astype(str) + "'>Link to docs<a/>"
g2 = pd.DataFrame({'front':front,'back':back})

In [17]:
cards = pd.concat([g1,g2])
cards.to_csv('decks/APL_AplCart.csv',index=False,header=False)