In [1]:
# To get multiple outputs from one code cell (without using print()):
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

from IPython import get_ipython
from IPython.display import HTML, Markdown, Image, Audio

import sys
from pathlib import Path

# For documenting the current environment:
def sys_info():
    frmt = '\nPython ver: {}\nPython env: {}\n'
    frmt += 'OS:         {}\nCurrent dir: {}\n'
    print(frmt.format(sys.version, 
                      Path(sys.prefix).name,
                      sys.platform,
                      Path.cwd()))

# For enabling imports from current project code:
def add_to_sys_path(this_path, up=False):
    """
    Prepend this_path to sys.path.
    If up=True, path refers to parent folder (1 level up).
    """
    newp = Path(this_path).as_posix() # no str method (?)
    if up:
        newp = Path(this_path).parent.as_posix()

    msg = F'Path already in sys.path: {newp}'
    if newp not in sys.path:
        sys.path.insert(1, newp)
        msg = F'Path added to sys.path: {newp}'
    print(msg)

# If this ipynb file is inside a folder, eg ./notebooks, 
# the project code is assumed to reside 1 level up:
nb_folder = 'notebooks'
add_to_sys_path(Path.cwd(), up=Path.cwd().name.startswith(nb_folder))


# For py modules/methods discovery:
def filter_dir(mdl, filter_str=None, start_with_str='_', exclude=True):
    """Filter dir(mdl) for method discovery.
       Input:
       :param mdl (object): module, optionally with submodule path(s), e.g. mdl.submdl1.submdl2.
       :param filter_str (str, None): filter all method names containing that string.
       :param start_with_str (str, '_'), exclude (bool, True): start_with_str and exclude work 
              together to perform search on non-dunder methods (default).
       Example:
       >filter_dir(re) # lists the public methods of the re module.
    """
    search_dir = [d for d in dir(mdl) if not d.startswith(start_with_str) == exclude]
    if filter_str is None:
        return search_dir
    else:
        filter_str = filter_str.lower()
        return [d for d in search_dir if d.lower().find(filter_str) != -1]

# To create often-used subfolders:
def get_project_dirs(which=['data', 'images'],
                     use_parent=True):
    '''Create folder(s) named in `which` at the ipynb parent level.'''
    if use_parent:
        dir_fn = Path.cwd().parent.joinpath
    else:
        dir_fn = Path.cwd().joinpath
        
    dir_lst = []    
    for d in which:
        DIR = dir_fn(d)
        if not DIR.exists():
            Path.mkdir(DIR)
        dir_lst.append(DIR)
    return dir_lst

DIR_DATA, DIR_IMG = get_project_dirs()

import pandas as pd
#pd.set_option("display.max_colwidth", 200)
from pprint import pprint as pp

    
def new_section(title='New section'):
    style = "text-align:center;background:#c2d3ef;padding:16px;color:#ffffff;font-size:2em;width:98%"
    div = f'<div style="{style}">{title}</div>'
    #return HTML('<div style="{}">{}</div>'.format(style, title))
    return get_ipython().set_next_input(div, 'markdown')


# For documenting the current environment:
def show_versions():
    txt = '<pre><br>'
    txt += F'Python:\t\t{sys.version}<br>'
    txt += F'Python env:\t{Path(sys.prefix).name}<br>'
    txt += F'Numpy:\t\t{np.__version__}<br>'
    txt += F'Scipy:\t\t{sp.__version__}<br>'
    txt += F'Pandas:\t\t{pd.__version__}<br>'
    txt += F'Matplotlib:\t{mpl.__version__}<br>'
    txt += F'Currrent dir: {Path.cwd()}'
    txt += '</pre>'
    div = f"""<div class="alert alert-info"><b>Versions:</b><br>{txt}</div>"""
    return HTML(div)


# autoreload extension
if 'autoreload' not in get_ipython().extension_manager.loaded:
    get_ipython().run_line_magic('load_ext', 'autoreload')

%autoreload 2

#..................
sys_info()

no_wmark = False
try:
    %load_ext watermark
    %watermark
except ModuleNotFoundError:
    no_wmark = True

if no_wmark:
    show_versions()
else:
    %watermark -iv


Path added to sys.path: C:/Users/catch/Documents/GitHub/DU-event-transcript-demo/resources/EventManagement

Python ver: 3.7.6 | packaged by conda-forge | (default, Jun  1 2020, 18:11:50) [MSC v.1916 64 bit (AMD64)]
Python env: p37
OS:         win32
Current dir: C:\Users\catch\Documents\GitHub\DU-event-transcript-demo\resources\EventManagement\notebooks

2021-01-06T15:56:02-05:00

CPython 3.7.6
IPython 7.16.1

compiler   : MSC v.1916 64 bit (AMD64)
system     : Windows
release    : 10
machine    : AMD64
processor  : Intel64 Family 6 Model 142 Stepping 10, GenuineIntel
CPU cores  : 8
interpreter: 64bit
matplotlib 3.3.1
pandas     1.0.5



---
---
# Amendments to corrections dict and people, names, places lists:

In [None]:
from manage import (EventMeta as Meta,
                    EventTranscription as TRX)

In [5]:
# Non-GUI way:
people_fname = TRX.people_file
names_fname = TRX.names_file
places_fname = TRX.places_file
upper_fname = TRX.upper_file
corrections_fname = TRX.corrections_file

In [6]:
upper_list = TRX.readcsv(upper_fname).upper.tolist()
people_list = TRX.readcsv(people_fname).people.tolist()
names_list = TRX.readcsv(names_fname).names.tolist()
places_list = TRX.readcsv(places_fname).places.tolist()

corrections = TRX.get_corrections_dict()

In [7]:
# search dict:
corrections.get('west mckinney', '?')

'Wes McKinney'

In [8]:
correction_lst = [('west mckinney', 'Wes McKinney'),
                 ('rashama', 'Reshama')]

check = TRX.check_corrections(corrections, correction_lst)
if check == -1 * len(correction_lst):
    print("OK to include all.") 
    corrections = TRX.add_corrections(corrections, correction_lst)

0 ('west mckinney', 'Wes McKinney') Wes McKinney
1 ('rashama', 'Reshama') Reshama


In [235]:
new_names = ['matplotlib', 'seaborn','plotly','fibonacci', 'wikipedia', 'markdown','windows', ]

check = TRX.check_list(names_list, new_names)

if check == -1 * len(new_names):
    print("OK to include all.") 
    TRX.update_substitution_file(which='names', user_list=new_names)
    names_list = TRX.readcsv(names_fname).names.tolist()

matplotlib -1
seaborn -1
plotly -1
fibonacci -1
wikipedia -1
markdown -1
windows -1
OK to include all.


In [79]:
TRX.check_list(places_list, ['columbia university'])

columbia university 36


36

In [78]:
places_list = TRX.readcsv(places_fname).places.tolist()

new_terms = ['columbia university']
check = TRX.check_list(places_list, new_terms)

if check == -1 * len(new_terms):
    print("OK to include all.") 
    TRX.update_substitution_file(which='places', user_list=new_terms)
    places_list = TRX.readcsv(places_fname).places.tolist()

columbia university -1
OK to include all.


In [72]:
new_terms = ['nyu']
check = TRX.check_list(upper_list, new_terms)

if check == -1 * len(new_terms):
    print("OK to include all.") 
    TRX.update_substitution_file(which='upper', user_list=new_terms)
    upper_list = TRX.readcsv(upper_fname).upper.tolist()

nyu 18


In [241]:
new_terms = ['melissa','brian','wes mckinney', 'jeff', 'jeff ryback', 'marco', 'marco gorelli']
check = TRX.check_list(people_list, new_terms)

if check == -1 * len(new_terms):
    print("OK to include all.") 
    TRX.update_substitution_file(which='people', user_list=new_terms)
    people_list = TRX.readcsv(people_fname).people.tolist()

melissa -1
brian -1
wes mckinney -1
jeff -1
jeff ryback -1
marco -1
marco gorelli -1
OK to include all.


---