_Run the first cell! (collapsed in JupyterLab)_

In [1]:
# To get multiple outputs from one code cell (without using print()):
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

from IPython import get_ipython
from IPython.display import HTML, Markdown, Image

import sys
from pathlib import Path

# For documenting the current environment:
def sys_info():
    frmt = '\nPython ver: {}\nPython env: {}\n'
    frmt += 'OS:         {}\nCurrent dir: {}\n'
    print(frmt.format(sys.version, 
                      Path(sys.prefix).name,
                      sys.platform,
                      Path.cwd()))

# For enabling imports from current project code:
def add_to_sys_path(this_path, up=False, verbose=True):
    """
    Prepend this_path to sys.path.
    If up=True, path refers to parent folder (1 level up).
    """
    newp = Path(this_path).as_posix() # no str method (?)
    if up:
        newp = Path(this_path).parent.as_posix()

    msg = F'Path already in sys.path: {newp}'
    if newp not in sys.path:
        sys.path.insert(1, newp)
        msg = F'Path added to sys.path: {newp}'
    if verbose:
        print(msg)

# If this ipynb file is inside a folder, eg ./notebooks, 
# the project code is assumed to reside 1 level up:
nb_folder = 'notebooks'
add_to_sys_path(Path.cwd(), up=Path.cwd().name.startswith(nb_folder))


# For py modules/methods discovery:
def filter_dir(mdl, filter_str=None, start_with_str='_', exclude=True):
    """Filter dir(mdl) for method discovery.
       Input:
       :param mdl (object): module, optionally with submodule path(s), e.g. mdl.submdl1.submdl2.
       :param filter_str (str, None): filter all method names containing that string.
       :param start_with_str (str, '_'), exclude (bool, True): start_with_str and exclude work 
              together to perform search on non-dunder methods (default).
       Example:
       >filter_dir(re) # lists the public methods of the re module.
    """
    search_dir = [d for d in dir(mdl) if not d.startswith(start_with_str) == exclude]
    if filter_str is None:
        return search_dir
    else:
        filter_str = filter_str.lower()
        return [d for d in search_dir if d.lower().find(filter_str) != -1]

# To create often-used subfolders:
def get_project_dirs(which=['data', 'images'],
                     use_parent=True):
    '''Create folder(s) named in `which` at the ipynb parent level.'''
    if use_parent:
        dir_fn = Path.cwd().parent.joinpath
    else:
        dir_fn = Path.cwd().joinpath
        
    dir_lst = []    
    for d in which:
        DIR = dir_fn(d)
        if not DIR.exists():
            Path.mkdir(DIR)
        dir_lst.append(DIR)
    return dir_lst

DIR_DATA, DIR_IMG = get_project_dirs()
    
import numpy as np
import pandas as pd
#pd.set_option("display.max_colwidth", 200)
from pprint import pprint as pp


# For documenting the current environment:
def show_versions():
    txt = '<pre><br>'
    txt += F'Python:\t\t{sys.version}<br>'
    txt += F'Python env:\t{Path(sys.prefix).name}<br>'
    txt += F'Numpy:\t\t{np.__version__}<br>'
    txt += F'Scipy:\t\t{sp.__version__}<br>'
    txt += F'Pandas:\t\t{pd.__version__}<br>'
    txt += F'Matplotlib:\t{mpl.__version__}<br>'
    txt += F'Currrent dir: {Path.cwd()}'
    txt += '</pre>'
    div = f"""<div class="alert alert-info"><b>Versions:</b><br>{txt}</div>"""
    return HTML(div)


# autoreload extension
if 'autoreload' not in get_ipython().extension_manager.loaded:
    get_ipython().run_line_magic('load_ext', 'autoreload')

%autoreload 2

#..................
sys_info()

no_wmark = False
try:
    %load_ext watermark
    %watermark
except ModuleNotFoundError:
    no_wmark = True

if no_wmark:
    show_versions()
else:
    %watermark -iv

Path added to sys.path: C:/Users/catch/Documents/GitHub/DU-event-transcript-demo/resources/EventManagement

Python ver: 3.7.6 | packaged by conda-forge | (default, Jun  1 2020, 18:11:50) [MSC v.1916 64 bit (AMD64)]
Python env: p37
OS:         win32
Current dir: C:\Users\catch\Documents\GitHub\DU-event-transcript-demo\resources\EventManagement\notebooks

2021-01-28T10:44:26-05:00

CPython 3.7.6
IPython 7.16.1

compiler   : MSC v.1916 64 bit (AMD64)
system     : Windows
release    : 10
machine    : AMD64
processor  : Intel64 Family 6 Model 142 Stepping 10, GenuineIntel
CPU cores  : 8
interpreter: 64bit
pandas 1.0.5
numpy  1.19.0



In [2]:
from manage import (EventMeta as Meta,
                    EventTranscription as TRX,
                    Controls as CTR,
                    Utils as UTL,
                    Audit as AUD)

import ipywidgets as ipw

---
---
# Audit: 
RE: regex in get_id_from_YT_url not working for all md files
## Q1: Is `parse_href` working? A1: Yes
## Q2: Is video_url working? A2: Yes
# Conclusion: Updated (fixed) regex in get_id_from_YT_url.

In [47]:
UTL.test_split_url()

Test meetup...
Test youtube...
All done.


---
---
# Audit: which xml files are not lowercase?
- Answer by testing 1st paragraph => Modified `xml_caption_to_text` to obtain `Audit.audit_xml_captions`

## Audit conclusion:
The xml files have consistently been lowercase since event 12, hence this does not warrant implementing
of a by-pass to text cleaning if they are not (the corrections would still need applying but they would not
be optimal without adding special cases if text is not lowercase).

In [50]:
AUD.get_all_transcripts(audit_captions=True)

Captions case check (on 1st P with minutes_mark= 1):
03, 2020:: Lower= False
everyone I am doing a recording of the scraping presentation and the original recording from the webinar didn't come out well so this is just a recording in today's presentation I'm going to talk about web scraping we're going to look at the website Poshmark comm and we're going to use Python and some additional packages to gather the data so agenda I'm gonna give a quick introduction about myself and the group then we're going to talk about web scraping and high level then we'll walk through a code example I'm going to share the code files so you can walk through it on your own as well and then during the webinar there was obviously QA it's a little bit about me I'm a product manager with General Assembly I used to run operations at an online data science bootcamp and that's kind of where I picked up everything I know about Python and programming and data science and 
)
04, 2020:: Lower= False
all right it's 

---
---
# Controls identification

---
---
# Debug new update_readme(): FIXED
- doubles up the table
- add a row even for an event update

---
---
# Fix problem with event numbering from df :: new event dict in Meta :: FIXED
```
    def new_event_dict(self):
        """
        Create a 'starter' event dict with event id generated
        from the readme table df.
        """
        new_dict = self.get_event_dict()

        # Update dict with defaults:
        new_dict['year'] = self.year
        new = self.df.index.argmax() + self.row_offset
        self.idn = idn_frmt(new)
        new_dict['idn'] = self.idn
        new_dict['transcriber'] = '?'
        new_dict['extra_references'] = ''
        new_dict['has_transcript'] = False
        new_dict['status'] = TrStatus.TODO.value
        new_dict['notes'] = ''
        new_dict['video_href_w'] = DEF_IMG_W #thumbnail
        
        v1 = self.insertion_idx(HDR_TPL.format(**new_dict))
        new_dict['trans_idx'] = v1
        return new_dict
``` 

---
---

# TO DO:

1. Produce the program flow chart depending on user status, e.g

---
---
# Utils for documenting the project
---

In [None]:
# test: https://nbviewer.jupyter.org/github/xflr6/graphviz/blob/master/examples/notebook.ipynb

import os
from graphviz import Digraph, Source

In [None]:
filter_dir(Digraph)

```
Digraph?
Init signature:
Digraph(
    name=None,
    comment=None,
    filename=None,
    directory=None,
    format=None,
    engine=None,
    encoding='utf-8',
    graph_attr=None,
    node_attr=None,
    edge_attr=None,
    body=None,
    strict=False,
)
```

In [None]:
Digraph.render?

In [None]:
os.environ['PROGRAMFILES']
os.environ['CONDA_PREFIX']
#C:\Program Files\Graphviz 2.44.1\bin

In [None]:
def set_gv_envir():
    """ Ad-hoc fix to have Graphiz (v2.38) working on my system. 
    Note that in case the error ExecutableNotFound occurs, the path to 
    graphviz must be added to the PATH variable, e.g:
    > "FileNotFoundError: [WinError 2] The system cannot find the file specified" 
    > "ExecutableNotFound: 
       failed to execute ['dot', '-Tsvg'], make sure the Graphviz executables are
       on your systems' PATH"
    The above is not sufficient: the error occurred even though graphviz, dot and
    neato are all on my system path.
    Calling this function on failed `try` solved the problem. (?)
"""
    gviz = os.path.join(os.environ['PROGRAMFILES'], 'Graphviz 2.44.1', 'bin')
    os.environ["PATH"] += os.pathsep + gviz
    cnd_gv = os.path.join(os.environ['CONDA_PREFIX'], 'Library', 'bin', 'python-graphviz') #'graphviz')
    os.environ["PATH"] += os.pathsep + cnd_gv
    return gviz, cnd_gv

set_gv_envir()

In [None]:
# test:
gvfile = DIR_IMG.joinpath('tbl.gv')

dot_dg = Digraph(comment='The Round Table', filename=gvfile, engine='dot')

dot_dg.node('A', 'King Arthur')
dot_dg.node('B', 'Sir Bedevere the Wise')
dot_dg.node('L', 'Sir Lancelot the Brave')

dot_dg.edges(['AB', 'AL'])
dot_dg.edge('B', 'L', constraint='false')

In [None]:
dot_dg.render(format='png', view=True)

In [None]:
dtree = {'User Type:':['Admin', 'Tanscriber'],
        }