# Nipams Project Overview & Operations

1. Gettings started
    1. Local  (Docker)
    2. Colab  (GoogleDrive & VSCode & Git)
2. Environment Variables
3. Git & Versioning
4. Data LifeCycle
    1. Download (GoogleDrive or GCP)
    2. Process & Harmonize
5. Modeling
    1. Train
6. Deployment
    1. GCP Auth
    2. Dash Deployment

In [None]:
DO_NOT_EXECUTE_ALL_CELLS

# Getting Started

## Develop - Env:Local (Docker)
### Recommended
1. Clone Repo Locally
2. Open Folder in VSCode
3. Recommended Docker Setup (using docker-compose)
    1. JupyterLab : `docker-compose up jupyterlab` and open in browser
        1. Connect to VSCode - https://code.visualstudio.com/docs/datascience/jupyter-notebooks#_connect-to-a-remote-jupyter-server
    2. Dash : `docker-compose up dashapp` and open in browser

In [39]:
import os; from importlib import reload;
%cd {os.environ['PROJECT_PATH']}
from utils.constants import *

/app


### Advanced
1. Complete Above steps
2. Individual Containers Docker
    1. Build Dockerfile (use VSCode) into image
    2. Run Docker Image (use interactive mode)
    3. Attach Shell to Running Container
    4. Run Jupyter (in Docker)
        1. `~/.local/bin/jupyter-lab --ip 0.0.0.0` and open in browser
        2. `~/.local/bin/jupyter-notebook --ip 0.0.0.0` and open in browser
        3. `python3 -m jupyterlab --port=8888 --no-browser --ip=0.0.0.0 --allow-root`
    5. Run Dash App
        1. `python3 src/dash_app/index.py`

## Develop - Env:Colab

### Recommended : 
1. Run Following Cells
    1. Connects to Drive for src files etc

### Advanced : 
2. Optionally setup Git
    1. Accesses Git Creds (stored in .env)
3. Optionally setup VSCode
    1. Sets up ngrok tunnel
    2. Click link when displayed to access VSCode from the Web

In [4]:
# !rm /root/.ngrok2/ngrok.yml
# !echo $NGROK_AUTH
# !ngrok authtoken $NGROK_AUTH
# !cat /root/.ngrok2/ngrok.yml

In [None]:
#@title Connect to gDrive & Run VS Code
PROJECT_FOLDER = "/content/drive/MyDrive/workspace/nipams-data-project" #@param {"type":"string"}

MOUNT_GDRVE = True
GIT_SETUP = True #@param {"type":"boolean"}
NGROK_SETUP = True #@param {"type":"boolean"}
VSCODE_SETUP = True #@param {"type":"boolean"}

if MOUNT_GDRVE:
  ## Load this for local development version of library.. Loads from GoogleDrive (changes persist)
  from google.colab import drive, widgets, data_table; 
  drive.mount('/content/drive');
  %cd "$PROJECT_FOLDER"
  
  # - Load .env
  # !pip install -q dotenv
  # import dotenv; dotenv.load_dotenv('.env')

  # - Install Requirements
  !pip install -qr requirements.txt

  # - Import Utils .py FIles
  import sys, os; from importlib import reload; sys.path.append(os.path.abspath("./src"))
  from utils import notebook_helpers; reload(notebook_helpers)
  !pwd

if GIT_SETUP:
  !git config --global user.email $GIT_EMAIL
  !git config --global user.name $GIT_USERNAME

if NGROK_SETUP:
  # - Install NGROK
  # !ngrok authtoken $NGROK_AUTH
  !ngrok authtoken 24AKT2LYZ0xRvS5Eam6lbsEBhwm_c2yLFuR1DrtoCtyVvAHZ

if VSCODE_SETUP:
  !pip install -q colabcode
  # get_ipython().system('pip install colabcode &> dev/null')
  from colabcode import ColabCode
  # !python -c 'from colabcode import ColabCode; ColabCode(port=10000)'
  print('''
  Run in Terminal : (located in bottom left)
  cd "/content/drive/MyDrive/workspace/nipams-data-project" && colabcode --port 10000 && rm install.sh
  ''')
  print(f'cd "{PROJECT_FOLDER}" && colabcode --port 10000 && rm install.sh')



# Environment Variables & Configuration

In [1]:
#@title Review Existing Environment Values
import os; os.environ

environ{'PATH': '/usr/local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin',
        'HOSTNAME': '26c363453ebc',
        'PROJECT_PATH': '/app',
        'GIT_EMAIL': 'angus.mclean@mail.mcgill.ca',
        'GIT_TOKEN': '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',
        'GIT_REPOSITORY': 'nipams-data-project',
        'ENV': 'LOCAL',
        'GCP_PROJECT': 'nipams',
        'PYTHONPATH': '/app/src/',
        'GOOGLE_APPLICATION_CREDENTIALS': 'creds/nipams-83df6260e5a8.json',
        'GCP_BUCKET': 'data_cleaned_lvet',
        'GIT_USERNAME': 'Angus-McLean',
        'LANG': 'C.UTF-8',
        'GPG_KEY': 'E3FF2839C048B25C084DEBE9B26995E310250568',
        'PYTHON_VERSION': '3.8.12',
        'PYTHON_PIP_VERSION': '21.2.4',
        'PYTHON_SETUPTOOLS_VERSION': '57.5.0',
        'PYTHON_GET_PIP_URL': 'https://github.com/pypa/get-pip/raw/2caf84b14febcda8077e59e9b8a6ef9a680aa392/public/get-pip.py',
        'PYTHON_GET_PIP_SHA256': '7c5239cea323cadae36083079a5ee6b2b3d56f25762a0

# Git Workflow & Git Commands
1. Set User, Token and Repository in .env
2. Load .env.yaml
3. Set the remote repo URL : `git remote set-url origin $GIT_PATH`
4. Perform Git Commands : `git commit`, `git push`, `git pull`


In [16]:
#@title Git Status
!git status

On branch master
Your branch is ahead of 'origin/master' by 7 commits.
  (use "git push" to publish your local commits)

Changes to be committed:
  (use "git reset HEAD <file>..." to unstage)

	[32mnew file:   __main__.py[m
	[32mmodified:   notebooks/nipams-data-project - Basic Run.ipynb[m
	[32mmodified:   notebooks/nipams-data-project - LifeCycle & Snippets.ipynb[m
	[32mmodified:   requirements.txt[m
	[32mnew file:   src/dash/__init__.py[m
	[32mmodified:   src/dash/app.py[m
	[32mnew file:   src/dash/apps/dash_global.py[m
	[32mnew file:   src/dash/apps/data_load.py[m
	[32mnew file:   src/dash/apps/data_overview.py[m
	[32mnew file:   src/dash/apps/data_table.py[m
	[32mnew file:   src/dash/apps/modeling.py[m
	[32mnew file:   src/dash/index.py[m

Changes not staged for commit:
  (use "git add <file>..." to update what will be committed)
  (use "git checkout -- <file>..." to discard changes in working directory)

	[31mmodified:   notebooks/nipams-data-project - Lif

In [15]:
#@title Git Add Changes
!git add .

In [None]:
#@title Git Pull
!git pull

In [None]:
#@title Git Log
!echo "----------- SUMMARY -----------"
!git log --oneline
!echo "----------- DETAILS -----------"
!git log

In [None]:
#@title Git Commit (run with caution)
git_commit_msg="<empty message>" #@param
!git commit -am "{git_commit_msg}"

In [None]:
# !git push origin
# !git push "https://$GIT_TOKEN@github.com/$GIT_USERNAME/$GIT_REPOSITORY.git"
# !git push "https://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@github.com/Angus-McLean/nipams-data-project.git"

# Data LifeCycle

## Data Download

In [30]:
from data import load_data; reload(load_data)
# load_data.fetch_data_from_gcs(folder='', pattern=MAT_FILE_PATTERN, dest_folder='data/raw_mat/', limit_files=10):
load_data.fetch_data_from_gcs(
    folder='data_cleaned_LVET_m',
    pattern='HLV',
    dest_folder='data/raw_mat/',
    limit_files=20
)

  a = df[filenameCol].str.extract('(sub\d+)_([A-z]+)(\d*)\.mat')


['data/raw_mat/sub112_HLV1.mat',
 'data/raw_mat/sub112_HLV2.mat',
 'data/raw_mat/sub176_HLV1.mat',
 'data/raw_mat/sub176_HLV2.mat',
 'data/raw_mat/sub176_HLV3.mat',
 'data/raw_mat/sub227_HLV1.mat',
 'data/raw_mat/sub227_HLV2.mat',
 'data/raw_mat/sub227_HLV3.mat',
 'data/raw_mat/sub251_HLV1.mat',
 'data/raw_mat/sub251_HLV2.mat',
 'data/raw_mat/sub251_HLV3.mat',
 'data/raw_mat/sub336_HLV1.mat',
 'data/raw_mat/sub336_HLV2.mat',
 'data/raw_mat/sub336_HLV3.mat',
 'data/raw_mat/sub337_HLV2.mat',
 'data/raw_mat/sub337_HLV3.mat',
 'data/raw_mat/sub341_HLV2.mat',
 'data/raw_mat/sub341_HLV3.mat',
 'data/raw_mat/sub442_HLV1.mat',
 'data/raw_mat/sub472_HLV2.mat']

## Process Matlab Files to Pickle DataFrame

In [None]:
from data import load_data, preprocess; reload(load_data);reload(preprocess);

dfBpAll, dfImuAll = load_data.load_dataframe_from_mat(folder='data/raw_mat/', pattern='LLV')
dfAll = preprocess.merge_imu_vcg_with_heartbeats(dfBpAll, dfImuAll)
dfAll.to_pickle(path='data/interim/df_LLV.pickle')

In [10]:
import pandas as pd
dfAll = pd.read_pickle('data/interim/df_LLV.pickle')
dfAll

Unnamed: 0,file,ax,ay,az,gx,gy,gz,heartbeat,patient,test_type,test_num,tsCC,ts,pp,sbp,dbp
0,sub019_LLV1.mat,-1.213860,1.086967,-9.967060,1.235962,-0.991821,-0.381470,3.0,sub019,LLV,1,True,0 days 00:00:02.235000,69.087044,132.656695,63.569650
1,sub019_LLV1.mat,-1.209870,1.050456,-9.969455,1.022339,-0.549316,0.160217,3.0,sub019,LLV,1,True,0 days 00:00:02.240000,69.087044,132.656695,63.569650
2,sub019_LLV1.mat,-1.213860,1.091756,-9.914388,0.999451,-0.556946,-0.411987,3.0,sub019,LLV,1,True,0 days 00:00:02.245000,69.087044,132.656695,63.569650
3,sub019_LLV1.mat,-1.228225,1.089362,-9.861716,0.839233,0.000000,-0.045776,3.0,sub019,LLV,1,True,0 days 00:00:02.250000,69.087044,132.656695,63.569650
4,sub019_LLV1.mat,-1.197101,1.057439,-9.911994,0.701904,0.015259,-0.267029,3.0,sub019,LLV,1,True,0 days 00:00:02.255000,69.087044,132.656695,63.569650
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
81318,sub221_LLV3.mat,2.831143,0.289698,-9.898826,-6.214142,-5.847931,-0.667572,41.0,sub221,LLV,3,True,0 days 00:00:28.472000,44.949636,118.662947,73.713311
81319,sub221_LLV3.mat,2.786850,0.284910,-10.290278,-3.707886,-8.377075,-0.907898,41.0,sub221,LLV,3,True,0 days 00:00:28.477000,44.949636,118.662947,73.713311
81320,sub221_LLV3.mat,2.762908,0.308852,-10.319008,-1.396179,-10.063171,-0.953674,41.0,sub221,LLV,3,True,0 days 00:00:28.482000,44.949636,118.662947,73.713311
81321,sub221_LLV3.mat,2.650381,0.244209,-10.163385,0.274658,-10.032654,-0.915527,41.0,sub221,LLV,3,True,0 days 00:00:28.487000,44.949636,118.662947,73.713311


## Read Data from Pickle

In [46]:
dfAll = load_data.load_dataframe_from_pickle('data/interim','LLV')
dfAll

Unnamed: 0,file,ax,ay,az,gx,gy,gz,heartbeat,patient,test_type,...,test_type.1,test_num,tsCC,ts,pp,sbp,dbp,test_type_bp,test_num_bp,patient_bp
0,sub019_LLV1.mat,-1.213860,1.086967,-9.967060,1.235962,-0.991821,-0.381470,3.0,sub019,LLV,...,LLV,1,True,0 days 00:00:02.235000,69.087044,132.656695,63.569650,LLV,1,sub019
1,sub019_LLV1.mat,-1.209870,1.050456,-9.969455,1.022339,-0.549316,0.160217,3.0,sub019,LLV,...,LLV,1,True,0 days 00:00:02.240000,69.087044,132.656695,63.569650,LLV,1,sub019
2,sub019_LLV1.mat,-1.213860,1.091756,-9.914388,0.999451,-0.556946,-0.411987,3.0,sub019,LLV,...,LLV,1,True,0 days 00:00:02.245000,69.087044,132.656695,63.569650,LLV,1,sub019
3,sub019_LLV1.mat,-1.228225,1.089362,-9.861716,0.839233,0.000000,-0.045776,3.0,sub019,LLV,...,LLV,1,True,0 days 00:00:02.250000,69.087044,132.656695,63.569650,LLV,1,sub019
4,sub019_LLV1.mat,-1.197101,1.057439,-9.911994,0.701904,0.015259,-0.267029,3.0,sub019,LLV,...,LLV,1,True,0 days 00:00:02.255000,69.087044,132.656695,63.569650,LLV,1,sub019
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
81318,,,,,,,,,,,...,LLV,3,True,0 days 00:00:28.472000,44.949636,118.662947,73.713311,LLV,3,sub221
81319,,,,,,,,,,,...,LLV,3,True,0 days 00:00:28.477000,44.949636,118.662947,73.713311,LLV,3,sub221
81320,,,,,,,,,,,...,LLV,3,True,0 days 00:00:28.482000,44.949636,118.662947,73.713311,LLV,3,sub221
81321,,,,,,,,,,,...,LLV,3,True,0 days 00:00:28.487000,44.949636,118.662947,73.713311,LLV,3,sub221


## Save DataFrames to BigQuery

In [65]:
FILE_PROCESSED = 'data/interim/-LV2_tmp_oct15.pickle'

dfAll = load_data.load_dataframe_from_pickle(path=FILE_PROCESSED)
dfImu = dfAll[INDICIES + INFO_COLS + IMU_COLS]
dfBp = dfAll[INDICIES + INFO_COLS + BP_COLS]

# dfImu.heartbeat = dfImu.heartbeat.astype(int)
dfImu.ts = dfImu.ts.astype(int)/1e9

dfImu.to_gbq("inputs.imu_tmp",'nipams', if_exists='replace', table_schema=None)
dfBp.to_gbq("inputs.bp_tmp",'nipams', if_exists='replace', table_schema=None)
display(dfImu)
display(dfBp)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfImu.ts = dfImu.ts.astype(int)/1e9


Unnamed: 0,file,heartbeat,patient,test_type,test_num,ax,ay,az,gx,gy,gz,ts
0,sub019_HLV2.mat,3.0,sub019,HLV,2,-0.818817,1.561019,-9.926359,1.037598,3.097534,0.221252,2.282
1,sub019_HLV2.mat,3.0,sub019,HLV,2,-0.821211,1.592144,-9.967060,1.838684,2.365112,0.152588,2.287
2,sub019_HLV2.mat,3.0,sub019,HLV,2,-0.860715,1.608903,-9.998185,2.315521,2.128601,0.102997,2.292
3,sub019_HLV2.mat,3.0,sub019,HLV,2,-0.848744,1.604115,-9.965863,2.243042,1.918793,-0.225067,2.297
4,sub019_HLV2.mat,3.0,sub019,HLV,2,-0.858321,1.620874,-9.921571,2.246857,1.682281,0.099182,2.302
...,...,...,...,...,...,...,...,...,...,...,...,...
193615,sub227_LLV2.mat,37.0,sub227,LLV,2,2.863465,0.432552,-9.670179,0.862122,2.159119,0.038147,33.764
193616,sub227_LLV2.mat,37.0,sub227,LLV,2,2.862268,0.459687,-9.701304,1.003265,1.930237,0.061035,33.769
193617,sub227_LLV2.mat,37.0,sub227,LLV,2,2.853888,0.452504,-9.725246,1.098633,1.991272,-0.053406,33.774
193618,sub227_LLV2.mat,37.0,sub227,LLV,2,2.851494,0.435745,-9.701304,1.541138,1.663208,0.381470,33.779


Unnamed: 0,file,heartbeat,patient,test_type,test_num,pp,sbp,dbp
0,sub019_HLV2.mat,3.0,sub019,HLV,2,70.450012,128.711227,58.261215
1,sub019_HLV2.mat,3.0,sub019,HLV,2,70.450012,128.711227,58.261215
2,sub019_HLV2.mat,3.0,sub019,HLV,2,70.450012,128.711227,58.261215
3,sub019_HLV2.mat,3.0,sub019,HLV,2,70.450012,128.711227,58.261215
4,sub019_HLV2.mat,3.0,sub019,HLV,2,70.450012,128.711227,58.261215
...,...,...,...,...,...,...,...,...
193615,sub227_LLV2.mat,37.0,sub227,LLV,2,40.560844,118.651726,78.090881
193616,sub227_LLV2.mat,37.0,sub227,LLV,2,40.560844,118.651726,78.090881
193617,sub227_LLV2.mat,37.0,sub227,LLV,2,40.560844,118.651726,78.090881
193618,sub227_LLV2.mat,37.0,sub227,LLV,2,40.560844,118.651726,78.090881


## Move Files Between Folders

In [26]:
import glob

import re
import shutil

In [15]:
!ls ../data/zips/data_cleaned_LVET*/

'../data/zips/data_cleaned_LVET 2/':
sub019	sub176	sub221	sub229	sub257	sub330	sub341	sub420	sub457
sub112	sub191	sub225	sub239	sub259	sub331	sub357	sub425	sub571
sub115	sub200	sub227	sub251	sub262	sub336	sub377	sub427	sub682
sub133	sub202	sub228	sub252	sub267	sub337	sub398	sub437

'../data/zips/data_cleaned_LVET 3/':
sub133	sub225	sub228	sub239	sub251	sub252	sub259

../data/zips/data_cleaned_LVET/:
'_read me.txt'	 sub202   sub252   sub337   sub437   sub554   sub644   sub854
 sub019		 sub221   sub257   sub341   sub442   sub557   sub682   sub885
 sub112		 sub225   sub259   sub357   sub444   sub559   sub740   sub886
 sub115		 sub227   sub262   sub377   sub456   sub571   sub762   sub919
 sub133		 sub228   sub267   sub398   sub457   sub589   sub770   sub991
 sub176		 sub229   sub330   sub420   sub472   sub602   sub772   sub994
 sub191		 sub239   sub331   sub425   sub502   sub605   sub773   sub996
 sub200		 sub251   sub336   sub427   sub551   sub629   sub777


In [35]:
# arrFiles = glob.glob('../data/zips/data_cleaned_LVET*/sub*/*.mat')
arrFiles = glob.glob('../data/raw_mat_all/*.mat')
len(arrFiles)

649

In [None]:
def getFileName(f):
    return re.findall('[^/]+$', f)[0]

def moveFile(f):
    shutil.move(f, '../data/raw_mat_all/'+getFileName(f))
    
for i in arrFiles:
    print(f'moving {i}')
    moveFile(i)

# Modeling

## Training

# Docker Compose

### Standard Startup
1. `docker-compose build`
2. `docker-compose up`

### Tear Down
1. `docker-compose down`


## Docker - Specific Container
### Build Image
`docker build -f Dockerfile -t templatedataproject:latest .`

### Run Image
`docker run -p 8501:8501 templatedataproject:latest`