In [2]:
# To get multiple outputs from one code cell (without using print()):
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

from IPython import get_ipython
from IPython.display import HTML, Markdown, Image, Audio

import sys
from pathlib import Path

# For documenting the current environment:
def sys_info():
    frmt = '\nPython ver: {}\nPython env: {}\n'
    frmt += 'OS:         {}\nCurrent dir: {}\n'
    print(frmt.format(sys.version, 
                      Path(sys.prefix).name,
                      sys.platform,
                      Path.cwd()))

# For enabling imports from current project code:
def add_to_sys_path(this_path, up=False):
    """
    Prepend this_path to sys.path.
    If up=True, path refers to parent folder (1 level up).
    """
    newp = Path(this_path).as_posix() # no str method (?)
    if up:
        newp = Path(this_path).parent.as_posix()

    msg = F'Path already in sys.path: {newp}'
    if newp not in sys.path:
        sys.path.insert(1, newp)
        msg = F'Path added to sys.path: {newp}'
    print(msg)

# If this ipynb file is inside a folder, eg ./notebooks, 
# the project code is assumed to reside 1 level up:
nb_folder = 'notebooks'
add_to_sys_path(Path.cwd(), up=Path.cwd().name.startswith(nb_folder))


# For py modules/methods discovery:
def filter_dir(mdl, filter_str=None, start_with_str='_', exclude=True):
    """Filter dir(mdl) for method discovery.
       Input:
       :param mdl (object): module, optionally with submodule path(s), e.g. mdl.submdl1.submdl2.
       :param filter_str (str, None): filter all method names containing that string.
       :param start_with_str (str, '_'), exclude (bool, True): start_with_str and exclude work 
              together to perform search on non-dunder methods (default).
       Example:
       >filter_dir(re) # lists the public methods of the re module.
    """
    search_dir = [d for d in dir(mdl) if not d.startswith(start_with_str) == exclude]
    if filter_str is None:
        return search_dir
    else:
        filter_str = filter_str.lower()
        return [d for d in search_dir if d.lower().find(filter_str) != -1]

# To create often-used subfolders:
def get_project_dirs(which=['data', 'images'],
                     use_parent=True):
    '''Create folder(s) named in `which` at the ipynb parent level.'''
    if use_parent:
        dir_fn = Path.cwd().parent.joinpath
    else:
        dir_fn = Path.cwd().joinpath
        
    dir_lst = []    
    for d in which:
        DIR = dir_fn(d)
        if not DIR.exists():
            Path.mkdir(DIR)
        dir_lst.append(DIR)
    return dir_lst

DIR_DATA, DIR_IMG = get_project_dirs()

import pandas as pd
#pd.set_option("display.max_colwidth", 200)

import matplotlib as mpl
from matplotlib import pyplot as plt
plt.ion()
plt.style.use('seaborn-muted')

from pprint import pprint as pp

# For adding colorfull divider in the nb:
def add_div(div_class='info', div_start='Tip:', 
            div_text='Some tip here', output_string=True):
    """
    Behaviour with default `output_string=True`:
    The cell is overwritten with the output, but the cell mode is still 'code',
    not 'markdown'.
    Workaround: After running the function, click on the new cell, press ESC, 
                type 'm', then run the new cell.
    If `output_string=False`, the output is displayed in an new cell with the 
    code cell visible.
    ```
    [x]
    add_div('alert-warning', 'Tip: ', 'some tip here', output_string=True)
    [x]
    <div class="alert alert-warning"><b>Tip: </b>some tip here</div>
    ```
    """
    accepted = ['info', 'warning', 'danger']
    div_class = div_class.lower()
    if div_class not in accepted:
        msg = f'<div class="alert"><b>Wrong class:&nbsp;</b> `div_start` not in: {accepted}.</div>'
        return Markdown(msg)
    
    div = f"""<div class="alert alert-{div_class}"><b>{div_start}&nbsp;&nbsp;</b>{div_text}</div>"""
    if output_string:
        return get_ipython().set_next_input(div, 'markdown')
    else:
        return Markdown(div)

    
def new_section(title='New section'):
    style = "text-align:center;background:#c2d3ef;padding:16px;color:#ffffff;font-size:2em;width:98%"
    div = f'<div style="{style}">{title}</div>'
    #return HTML('<div style="{}">{}</div>'.format(style, title))
    return get_ipython().set_next_input(div, 'markdown')


# For documenting the current environment:
def show_versions():
    txt = '<pre><br>'
    txt += F'Python:\t\t{sys.version}<br>'
    txt += F'Python env:\t{Path(sys.prefix).name}<br>'
    txt += F'Numpy:\t\t{np.__version__}<br>'
    txt += F'Scipy:\t\t{sp.__version__}<br>'
    txt += F'Pandas:\t\t{pd.__version__}<br>'
    txt += F'Matplotlib:\t{mpl.__version__}<br>'
    txt += F'Currrent dir: {Path.cwd()}'
    txt += '</pre>'
    div = f"""<div class="alert alert-info"><b>Versions:</b><br>{txt}</div>"""
    return HTML(div)


# autoreload extension
if 'autoreload' not in get_ipython().extension_manager.loaded:
    get_ipython().run_line_magic('load_ext', 'autoreload')

%autoreload 2

#..................
sys_info()

no_wmark = False
try:
    %load_ext watermark
    %watermark
except ModuleNotFoundError:
    no_wmark = True

if no_wmark:
    show_versions()
else:
    %watermark -iv


Path added to sys.path: C:/Users/catch/Documents/GitHub/DU-event-transcript-demo/resources/EventManagement

Python ver: 3.7.6 | packaged by conda-forge | (default, Jun  1 2020, 18:11:50) [MSC v.1916 64 bit (AMD64)]
Python env: p37
OS:         win32
Current dir: C:\Users\catch\Documents\GitHub\DU-event-transcript-demo\resources\EventManagement\notebooks

2020-12-21T15:00:52-05:00

CPython 3.7.6
IPython 7.16.1

compiler   : MSC v.1916 64 bit (AMD64)
system     : Windows
release    : 10
machine    : AMD64
processor  : Intel64 Family 6 Model 142 Stepping 10, GenuineIntel
CPU cores  : 8
interpreter: 64bit
pandas     1.0.5
matplotlib 3.3.1



---
---
# IMPLEMENTATION DETAILS: Audio transcription of Data Umbrella's video presentations on  YouTube
### Non-GUI workflow examples to show function calls, class methods, etc.
---

In [3]:
from manage import (EventMeta as Meta,
                    EventTranscription as TRX,
                    Workflow as FLO,
                    Utils as UTL)

main_readme = Meta.MAIN_README

---
# Original (live) README:
The live, original is in https://github.com/data-umbrella/event-transcripts
#### This Markdown file has the links to the individual presentation transcripts, whether completed or not.

---
# Modified main readme file (in this repo):
* 'README_original.md' is a copy of the live README file with markers (htm comments) added to identify the start and end of the events table.
* It included all the names associated with an event, e.g.: "Hugo, James" -> "Hugo Bowne-Anderson, James Bourbeau"
* It may also contain any test/dummy entry created as demo entries.
* Additionally, some transcript Markdown files have undergone a "header normalization".


## Transcript "header normalization"
### The H2-headers items must have a unique list header:
Example: in "05" Andreas Mueller's md, there are 2 Video list items:
### First sub-header with correct format:  
```
- Video:  https://www.youtube.com/watch?v=5OL8XoMMOfA&feature=youtu.be&t=1
```
### The duplicated list item header, '- Video' was renamed "- Sprint Video":  
```
- Sprint Video:  [Scikit-learn Sprint Instructions](https://youtu.be/5OL8XoMMOfA) (30 minutes)
```
### Finally, list items without a heading (e.g. just a Markdown link) were given one:   
```
- Contributing primer: **[Contributing Workflow Commands](contributing/workflow.md)** (environment set-up, repo, submitting PR)
- Contributing to Scikit-learn: [Scikit-learn Contributing Documentation](http://scikit-learn.org/stable/developers/contributing.html)
```    

In [4]:
readme = FLO.show_local_readme(main_readme)
Markdown('### Local README:')
readme
Markdown('---\n---')

### Local README:

<p >
 <a href="https://www.dataumbrella.org" target="_blank"> <img src="images/full_logo_transparent.png" height="30%" width="30%" /> </a>
</p>

# Event Transcripts


## [Videos](https://www.youtube.com/c/DataUmbrella/videos)
Subscribe to our YouTube [Data Umbrella channel](https://www.youtube.com/c/DataUmbrella/videos).

## [Contributing Guide](CONTRIBUTING.md)
Review our [Contributing Instructions](CONTRIBUTING.md) before beginning editing or transcribing work.  

<!-- main_tbl_start -->
| #  | Speaker             | Talk Transcript  | Transcriber  | Status | Notes |
|--- |---                  |---               |---           |---     |---    |
| 01 | Hugo Bowne-Anderson | Bayesian Data Science  | N.A. | Not recorded | |
| 02 | Bruno Goncalves     | Time Series Modeling   | N.A. | Not recorded | |
| 03 | Ty Shaikh           | [Webscraping Poshmark](2020/03-ty-shaikh-webscraping.md) | ?  |  | |
| 04 | Ali Spittel         | [Navigating Your Tech Career](2020/04-ali-spittel-career.md)| Janine | Needs reviewer | |
| 05 | Andreas Mueller     | [Crash Course in Contributing to Scikit-learn](2020/05-andreas-mueller-contributing.md) | Reshama Shaikh | Complete | |
| 06 | Reshama Shaikh      | [Example PR for Scikit-learn](2020/06-reshama-shaikh-sklearn-pr.md) | Reshama, Mark  | Complete | |
| 07 | Shailvi Wakhlu      | [Fixing Bad Data and Using SQL](2020/07-shailvi-wakhlu-fixing-data.md) | Juanita | Complete | |
| 08 | Matt Brems          | [Data Science with Missing Data](2020/08-matt-brems-missing-data.md) | Barbara | | |
| 09 | Sam Bail            | [Intro to Terminal](2020/09-sam-bail-terminal.md) | Isaack | Complete | |
| 10 | Emily Robinson      | [Build a Career in Data Science](2020/10-emily-robinson-career.md) | Kevin | Complete | |
| 11 | Rebecca Kelly       | [Kdb Time Series Database](2020/11-rebecca-kelly-kdb.md) | Coretta | Needs reviewer | Paragraphs are too long |
| 12 | Mridu Bhatnagar     | [Build a Bot](2020/12-mridu-bhatnagar-bot.md) | ? |  | |
| 13 | Liz DiLuzio         | [Creating Nimble Data Processes](2020/13-liz-diluzio-data-process.md) | Lily | Complete | |
| 14 | Megan Robertson     | [3 Lessons From 3 Years of Data Science](2020/14-megan-robertson-career.md) | Sethupathy  | Needs reviewer | Headers should not be in capital letters, etc |
| 15 | Emma Gouillart      | [Data Visualization with Plotly](2020/15-emma-gouillart-plotly.md) | ? |  | |
| 16 | Hugo Bowne-Anderson, James Bourbeau | [Data Science and Machine Learning at Scale](2020/16-hugo-james-dask.md) | Cynthia |  | |
| 17 | Carol Willing      | [Contributing to Core Python](2020/17-carol-python.md) | ? |  | |
<!-- main_tbl_end -->


---
---

In [5]:
# Markdown code of file:
Markdown('### README Markdown code:  \n**Note the added comment lines at start and end of main table: added for automatic update**')
pp(readme.data)

### README Markdown code:  
**Note the added comment lines at start and end of main table: added for automatic update**

('<p >\n'
 ' <a href="https://www.dataumbrella.org" target="_blank"> <img '
 'src="images/full_logo_transparent.png" height="30%" width="30%" /> </a>\n'
 '</p>\n'
 '\n'
 '# Event Transcripts\n'
 '\n'
 '\n'
 '## [Videos](https://www.youtube.com/c/DataUmbrella/videos)\n'
 'Subscribe to our YouTube [Data Umbrella '
 'channel](https://www.youtube.com/c/DataUmbrella/videos).\n'
 '\n'
 '## [Contributing Guide](CONTRIBUTING.md)\n'
 'Review our [Contributing Instructions](CONTRIBUTING.md) before beginning '
 'editing or transcribing work.  \n'
 '\n'
 '<!-- main_tbl_start -->\n'
 '| #  | Speaker             | Talk Transcript  | Transcriber  | Status | '
 'Notes |\n'
 '|--- |---                  |---               |---           |---     '
 '|---    |\n'
 '| 01 | Hugo Bowne-Anderson | Bayesian Data Science  | N.A. | Not recorded | '
 '|\n'
 '| 02 | Bruno Goncalves     | Time Series Modeling   | N.A. | Not recorded | '
 '|\n'
 '| 03 | Ty Shaikh           | [Webscraping '
 'Poshmark](2020/03-ty-sh

---

<div style="text-align:center;background:#c2d3ef;padding:16px;color:#ffffff;font-size:2em;width:98%">A. Admin workflow (code):</div>

---
---
# ADD mode: initial event setup: new table entry & creation of 'starter transcript' file

1. Get generic header dict
2. Update header dict interactively
3. Save transcript header dict as json in year/meta/ 
4. Create 'starter transcript' using header.json file
5. Update table in main Transcripts/README.md with new entry
6. TODO: send PR

### 1. Get generic header dict

In [133]:
tr = Meta.TranscriptMeta()

### 2. Update header dict (will be done via form):

In [134]:
# Extract dict for update:
meta_dict = tr.metadata 

# dummy data:
DU_video = 'https://youtu.be/PU1WyDPGePI'
DU_meetup = 'https://www.meetup.com/nyc-data-umbrella/events/271116695/'
Meta.dummy_update(tr, meta_dict,
                  DU_video,
                  DU_meetup,
                  title='Demo Audio Transcription.', titlekw='Demo foo')

# Updated dict:
tr.metadata 

OrderedDict([('presenter', 'Cat Chenal, Reshama Shaikh'),
             ('title', 'Demo Audio Transcription.'),
             ('year', '2020'),
             ('transcript_md', '18-cat-reshama-demo-foo.md'),
             ('meetup_url',
              'https://www.meetup.com/nyc-data-umbrella/events/271116695/'),
             ('yt_video_id', 'PU1WyDPGePI'),
             ('slides_url', 'N.A.'),
             ('repo_url', 'N.A.'),
             ('notebook_url', 'N.A.'),
             ('transcriber', '?'),
             ('extra_references',
              '## Other References\n- Binder:  <url>\n- Paper:  <Paper url or citation>  \n- Wiki:  This is an excellent [wiki on Demo foo](http://en.wikipedia.org/wiki/Main_Page) (extra ref 2) \n'),
             ('video_href_w', '25%'),
             ('video_href',
              'http://www.youtube.com/watch?feature=player_embedded&v=PU1WyDPGePI'),
             ('video_href_src', 'http://img.youtube.com/vi/PU1WyDPGePI/0.jpg'),
             ('video_href_alt', 'De

### 3. Save transcript header dict
### 4. Create 'starter transcript'

In [135]:
tr.save_meta()
tr.save_transcript_md()

Markdown tpl rendered into file:
 C:\Users\catch\Documents\GitHub\DU-event-transcript-demo\2020\18-cat-reshama-demo-foo.md


### (via GUI) Show rendered transcript_md file with update:

In [136]:
Markdown('---')
Markdown(filename=tr.transcript_md_path())
Markdown('---')

---

# Cat Chenal, Reshama Shaikh: Demo Audio Transcription.

## Key Links

-   Transcript:  https://github.com/data-umbrella/event-transcripts/blob/main/2020/18-cat-reshama-demo-foo.md
-   Meetup Event:  https://www.meetup.com/nyc-data-umbrella/events/271116695/
-   Video: https://youtu.be/PU1WyDPGePI
-   Slides: N.A.
-   GitHub repo: N.A.
-   Jupyter Notebook: N.A.
-   Transcriber: ?

## Other References

-   Binder:
-   Paper:
-   Wiki: This is an excellent [wiki on Demo
  foo](http://en.wikipedia.org/wiki/Main_Page) (extra ref 2)

## Video

<div width="25%">
  <a href="http://www.youtube.com/watch?feature=player_embedded&v=PU1WyDPGePI" target="_blank">
              <img src="http://img.youtube.com/vi/PU1WyDPGePI/0.jpg" 
                   alt="Demo AudioTranscription."/>
          </a>
        
</div>

## Transcript

N.A.

---

### 5. Update table in main Transcripts/README.md
### (via GUI): After displaying transcript_md, ask if ok to update:

In [139]:
do_update = input("Update table entry in README? [y]: yes, [n]: no")
do_update = 1 if do_update.lower() == 'y' else 0

if do_update:
    tr.update_readme()

Update table entry in README? [y]: yes, [n]: no y


'Table in README_original.md was updated.'

### Show updated readme:

In [140]:
Markdown('---\n### Udated (local) README:')
Markdown(filename=main_readme)
Markdown('---')

---

<p >
 <a href="https://www.dataumbrella.org" target="_blank"> <img src="images/full_logo_transparent.png" height="30%" width="30%" /> </a>
</p>

# Event Transcripts


## [Videos](https://www.youtube.com/c/DataUmbrella/videos)
Subscribe to our YouTube [Data Umbrella channel](https://www.youtube.com/c/DataUmbrella/videos).

## [Contributing Guide](CONTRIBUTING.md)
Review our [Contributing Instructions](CONTRIBUTING.md) before beginning editing or transcribing work.  

<!-- main_tbl_start -->
| #  | Speaker             | Talk Transcript  | Transcriber  | Status | Notes |
|--- |---                  |---               |---           |---     |---    |
| 01 | Hugo Bowne-Anderson | Bayesian Data Science  | N.A. | Not recorded | |
| 02 | Bruno Goncalves     | Time Series Modeling   | N.A. | Not recorded | |
| 03 | Ty Shaikh           | [Webscraping Poshmark](2020/03-ty-shaikh-webscraping.md) | ?  |  | |
| 04 | Ali Spittel         | [Navigating Your Tech Career](2020/04-ali-spittel-career.md)| Janine | Needs reviewer | |
| 05 | Andreas Mueller     | [Crash Course in Contributing to Scikit-learn](2020/05-andreas-mueller-contributing.md) | Reshama Shaikh | Complete | |
| 06 | Reshama Shaikh      | [Example PR for Scikit-learn](2020/06-reshama-shaikh-sklearn-pr.md) | Reshama, Mark  | Complete | |
| 07 | Shailvi Wakhlu      | [Fixing Bad Data and Using SQL](2020/07-shailvi-wakhlu-fixing-data.md) | Juanita | Complete | |
| 08 | Matt Brems          | [Data Science with Missing Data](2020/08-matt-brems-missing-data.md) | Barbara | | |
| 09 | Sam Bail            | [Intro to Terminal](2020/09-sam-bail-terminal.md) | Isaack | Complete | |
| 10 | Emily Robinson      | [Build a Career in Data Science](2020/10-emily-robinson-career.md) | Kevin | Complete | |
| 11 | Rebecca Kelly       | [Kdb Time Series Database](2020/11-rebecca-kelly-kdb.md) | Coretta | Needs reviewer | Paragraphs are too long |
| 12 | Mridu Bhatnagar     | [Build a Bot](2020/12-mridu-bhatnagar-bot.md) | ? |  | |
| 13 | Liz DiLuzio         | [Creating Nimble Data Processes](2020/13-liz-diluzio-data-process.md) | Lily | Complete | |
| 14 | Megan Robertson     | [3 Lessons From 3 Years of Data Science](2020/14-megan-robertson-career.md) | Sethupathy  | Needs reviewer | Headers should not be in capital letters, etc |
| 15 | Emma Gouillart      | [Data Visualization with Plotly](2020/15-emma-gouillart-plotly.md) | ? |  | |
| 16 | Hugo, James         | [Data Science and Machine Learning at Scale](2020/16-hugo-james-dask.md) | Cynthia |  | |
| 17 | Carol Willing      | [Contributing to Core Python](2020/17-carol-python.md) | ? |  | |
| 18| Cat Chenal, Reshama Shaikh| [Demo Audio Transcription.](2020/18-cat-reshama-demo-foo.md)| ?| Not yet processed (editor needed)| | 
<!-- main_tbl_end -->


---

---
---
# MODIFY mode: Amend an existing transcript (header) or main table entry

1. Instatiate the TranscriptMeta object with the event id.  
If there is no associated meta.json, it is created after parsing the transcript md.
2. Amend the table entry
3. Save


### Note on Event 17, "Contributing to Core Python":
* Its header contains errors due to re-use of previous presentation readme as template
* The Video link is missing a thumbnail
* Its `alt` values is still "alt="Data Science and Machine Learning at Scale"

### => Using a template for the transcript file (this implementation) would avoid this.

In [8]:
idn = 17
tr1 = Meta.TranscriptMeta(idn)

# index trick to display a series with the df style
tr1.df.loc[idn-1:idn-1]
tr1

Unnamed: 0,N,Speaker,Talk Transcript,Transcriber,Status,Notes,title,readme
16,17,Carol Willing,[Contributing to Core Python](2020/17-carol-py...,?,,,Contributing to Core Python,17-carol-python.md


The transcript is excluded from the repr as it can be VERY long. To view it, use:
EventMeta.pp(TranscriptMeta.metadata['formatted_transcript'])
  warn(msg)


{'Github Repo': 'N.A.',
 'Jupyter Notebook': 'N.A.',
 'audio_text': '17_Pkg-DKkObKs.txt',
 'audio_track': '17_Pkg-DKkObKs.mp4',
 'extra_references': '## Reference Links\n'
                     '- Python On Discourse:  '
                     'https://discuss.python.org/c/welcome/12 \n'
                     "- Carol'S Pycon 2015 Talk:  "
                     'https://www.youtube.com/watch?v=szeo1XgmuEk \n'
                     '- Contributing To Scikit-Learn:  '
                     'https://www.dataumbrella.org/open-source/contributing-to-scikit-learn \n'
                     '- Book Recommendation, High Performance Python:  '
                     'https://www.oreilly.com/library/view/high-performance-python/9781492055013/ \n',
 'formatted_transcript': '< NOT SHOWN >',
 'has_transcript': True,
 'idn': '17',
 'meetup_url': 'https://www.meetup.com/data-umbrella/events/273988042/',
 'meta_json': '17.json',
 'notebook_url': 'N.A.',
 'notes': '',
 'presenter': 'Carol Willing',
 'repo_url': '

In [12]:
# Show top of text, 2 ways:

tr1.metadata['formatted_transcript'][:20]
# or
#print(tr1.get_transcript_text()[:900])

['hello everyone thank you for joining',
 'our webinar for today uh thanks for',
 'joining data umbrella',
 "i'm gonna do a quick introduction uh",
 'carol willing is going to do her talk',
 "and we'll have a q a",
 'session at the end and and this webinar',
 'is being recorded',
 "a little bit about me i'm a statistician",
 "data scientist i'm the founder of data",
 'umbrella',
 'and i am on twitter linkedin github has',
 'raised my s',
 'feel free to follow me',
 "we have a code of conduct we're",
 'dedicated to providing harassment free',
 'professional',
 'respectful experience for everyone this',
 'applies to the chat',
 'as well um thank you for helping make']

### Amendments: via GUI (provides validation)
---
---

<div style="text-align:center;background:#c2d3ef;padding:16px;color:#ffffff;font-size:2em;width:98%">B. Editor workflow (code):</div>

### 1. Instantiate the `TranscriptMeta` class with selected idn:

In [None]:
idn = 12
tr = Meta.TranscriptMeta(idn)

# index trick to display a series with the df style
tr.df.loc[idn-1:idn-1]

# Show thumbnail:
Image(tr.metadata['video_href_src'])

### 2. Instatiate the `YTAudio` class:

In [None]:
yta = TRX.YTAudio(tr)

yta.captions_xml is None

### 3. Download the audio (recommended for listening during editing)

In [None]:
yta.download_audio()

### 4. Display audio via player:

In [None]:
Audio(filename=yta.audio_filepath)

### 5. Get the automated, English transcript:

In [None]:
raw_transcript = yta.get_initial_transcript()
print(raw_transcript)

### 6. Edit (via GUI)

### 7. Save (partial) edit:

In [None]:
UTL.save_file(Meta.DIR_DATA.joinpath(tr.metadata['audio_text']), raw_transcript)

### 8. Render raw_transcript in Markdown:

In [None]:
raw_mark = Markdown(raw_transcript)
raw_mark

### 9. After initial transcript is generated, user can modify the replacements list and retry:

#### Populate the lists & dict from files:

In [None]:
# Non-GUI way:
people_fname = TRX.people_file
names_fname = TRX.names_file
places_fname = TRX.places_file
upper_fname = TRX.upper_file
correct_json = TRX.correct_json

corrections = UTL.load_file_contents(correct_json)
upper_list = TRX.readcsv(upper_fname).upper.tolist()

people_list = TRX.readcsv(people_fname).people.tolist()
names_list = TRX.readcsv(names_fname).names.tolist()
places_list = TRX.readcsv(places_fname).places.tolist()

#### All added entries are saved in lowercase because the initial transcript is using the video captions, which ~~are~~ can be _lowercase_.
**Important Note**  
> Many (most) of the auto-generated captions xml files have all lowercase a text. Some have entities that are recognized for true casing, but they would still require some clean up. This is why the xml text is processed as lower case.

#### Maybe, search the list(s) or corrections dict first:

```
TRX.search_list(upper_list,'api')
TRX.search_list(people_list,'Mridu bhatnagar')

corrections.get('github', '?)
```

#### To add terms for uppercasing:
```
new_terms = ['nlp',]
TRX.update_substitution_file(which='upper', user_list=new_terms)

# reload & check after update:
upper_list = TRX.readcsv(upper_fname).upper.tolist()
TRX.search_list(upper_list, new_terms[0])
```

#### To add names (or people, places) for titlecasing:
##### Note that adding names with apostophe will result in uppercasing of the letter following it, e.g.: "april's fools" => "April'S Fools"
```
new_names = ['ford foundation',]
TRX.update_substitution_file(which='names', user_list=new_names)

# reload & check after update:
names_list = TRX.readcsv(names_fname).names.tolist()
TRX.search_list(names_list, new_names[0])
```

#### To add an entry to the corrections dict:
##### Only amend the corrections for entries that do not fit the upper- or titlecasing schemes, e.g.: 'whatsapp' -> 'WhatsApp'
```
corrections['github'] = 'GitHub'
UTL.save_file(correct_json, corrections)
corrections = UTL.load_file_contents(correct_json)
```