This repository has been archived by the owner on Sep 18, 2023. It is now read-only.
/
Makefile
175 lines (157 loc) · 5.73 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
## TODO: fix video cropping for wrong-sized videos
PWD=$(shell pwd)
AWS_ACCESS_KEY_ID?=required_if_using_aws_transcribe
AWS_SECRET_ACCESS_KEY?=required_if_using_aws_transcribe
DOCKER_IMAGE?=uscictdocker/mentor-pipeline:1.4.1
DOCKER_CONTAINER=mentor-pipeline
PROJECT_ROOT?=$(shell git rev-parse --show-toplevel 2> /dev/null)
AWS_REGION?=us-east-1
TRANSCRIBE_AWS_S3_BUCKET_SOURCE?=mentorpal-transcribe-source
TRANSCRIBE_MODULE_PATH?=transcribe_aws
DEV_ENABLED?=
DEV_ROOT?=$(shell cd ~/projects && pwd 2> /dev/null)
DEV_MENTOR_PIPELINE?=$(shell cd $(DEV_ROOT)/mentor-pipeline && pwd 2> /dev/null)
DEV_TRANSCRIBE?=$(shell cd $(DEV_ROOT)/py-transcribe && pwd 2> /dev/null)
DEV_TRANSCRIBE_AWS?=$(shell cd $(DEV_ROOT)/py-transcribe-aws && pwd 2> /dev/null)
DOCKER_PYTHON_VERSION=3.7
DOCKER_SITE_PACKAGES=/usr/local/lib/python$(DOCKER_PYTHON_VERSION)/site-packages
DOCKER_ENV_ARGS=\
-e AWS_REGION=$(AWS_REGION) \
-e AWS_ACCESS_KEY_ID=$(AWS_ACCESS_KEY_ID) \
-e AWS_SECRET_ACCESS_KEY=$(AWS_SECRET_ACCESS_KEY) \
-e TRANSCRIBE_AWS_S3_BUCKET_SOURCE=$(TRANSCRIBE_AWS_S3_BUCKET_SOURCE) \
-e TRANSCRIBE_MODULE_PATH=$(TRANSCRIBE_MODULE_PATH)
DOCKER_VOLUME_ARGS_DATA_AND_VIDEO=\
-v $(PWD)/data:/app/mounts/data \
-v $(PWD)/videos:/app/mounts/videos
DOCKER_VOLUME_ARGS_DEV=
ifeq ("$(DEV_ENABLED)", "1")
ifneq ("$(DEV_TRANSCRIBE)", "")
DOCKER_VOLUME_ARGS_DEV += -v $(DEV_TRANSCRIBE)/transcribe:$(DOCKER_SITE_PACKAGES)/transcribe
endif
ifneq ("$(DEV_TRANSCRIBE_AWS)", "")
DOCKER_VOLUME_ARGS_DEV += -v $(DEV_TRANSCRIBE_AWS)/transcribe_aws:$(DOCKER_SITE_PACKAGES)/transcribe_aws
endif
ifneq ("$(DEV_MENTOR_PIPELINE)", "")
DOCKER_VOLUME_ARGS_DEV += -v $(DEV_MENTOR_PIPELINE)/mentor_pipeline:/app/mentor_pipeline
endif
endif
DOCKER_ARGS=\
$(DOCKER_ENV_ARGS) \
$(DOCKER_VOLUME_ARGS_DATA_AND_VIDEO) \
$(DOCKER_VOLUME_ARGS_DEV)
# virtualenv used for pytest
VENV=.venv
$(VENV):
$(MAKE) venv-create
abs=$(shell d=$$(cd $$(dirname $(1)); pwd); f=$$(basename '$(1)') && echo "$${d}/$${f}")
absdir=$(shell cd $$(dirname $(1)); pwd)
fname=$(shell echo $$(basename '$(1)'))
.PHONY: venv-create
venv-create: virtualenv-installed
[ -d $(VENV) ] || virtualenv -p python3 $(VENV)
$(VENV)/bin/pip install --upgrade pip
$(VENV)/bin/pip install -r ./requirements.txt
$(VENV)/bin/pip install -r ./requirements.test.txt
virtualenv-installed:
$(PROJECT_ROOT)/bin/virtualenv_ensure_installed.sh
# Removes single mentor's data files from the local file system
.PHONY: data/mentors/%/clean
data/mentors/%/clean:
@echo "cleaning data/mentors/$*/build..."
@rm -rf "data/mentors/$*/build"
# Removes single mentor's data files from the local file system
.PHONY: videos/%/clean
videos/%/clean:
@echo "cleaning videos/$*..."
@rm -rf "videos/$*"
# Removes all mentor files from the local file system
.PHONY clean:
clean:
@for m in data/mentors/*/*; do $(MAKE) data/mentors/$${m}/clean; done
@for m in videos/*/*; do $(MAKE) videos/$${m}/clean; done
# Runs a shell inside the data processing pipeline dockerfile
.PHONY shell:
shell:
docker run \
-it \
--rm \
--name $(DOCKER_CONTAINER) \
--entrypoint /bin/bash \
$(DOCKER_ARGS) \
$(DOCKER_IMAGE)
# Complete build of mentor data
# Runs build if necessary
# Generates data files
# TODO: 1) log every significant action (generating audio, transcribing), 2) build classifier for jd, 3) utterance yaml gets error codes, 4) make delete audio files that failed to transcribe
.PHONY: data/mentor-%
data/mentor-%:
docker run \
--rm \
--name $(DOCKER_CONTAINER) \
$(DOCKER_ARGS) \
$(DOCKER_IMAGE) data-update --mentor $* --data=/app/mounts/data/mentors $(args)
.PHONY: data/topics_by_question.csv/mentor-%
data/topics_by_question.csv/mentors-%:
docker run \
--rm \
--name $(DOCKER_CONTAINER) \
-v $(PWD)/data:/app/mounts/data \
$(DOCKER_IMAGE) topics-by-question-generate --mentor $* --data=/app/mounts/data/mentors
.PHONY: videos/mentor-%
videos/mentor-%: data/mentor-%
docker run \
--rm \
--name $(DOCKER_CONTAINER) \
-v $(PWD)/data:/app/mounts/data \
-v $(PWD)/videos:/app/mounts/videos \
$(DOCKER_ARGS) \
$(DOCKER_IMAGE) videos-update --mentor $* --data=/app/mounts/data/mentors
.PHONY: videos/shell
videos/shell:
docker run \
-it \
--rm \
--name $(DOCKER_CONTAINER) \
-v $(PWD)/data:/app/mounts/data \
-v $(PWD)/videos:/app/mounts/videos \
$(DOCKER_ARGS) \
--entrypoint /bin/bash \
$(DOCKER_IMAGE)
##################################################################
# Tool to reduce noise a mentor's utterance videos.
#
# NOTE: requires noise sample[s] to be set up under
#
# ./data/mentors/<mentor>/build/noise
#
# ...where each noise sample is a wav file that will be used
# on utterances where the noise-sample file name is a prefix
# if the utterance id, e.g. ./data/mentors/<mentor>/build/noise/s001p001.wav
# will apply to all utterance whose id starts with s001p001
#
# NOTE: this process does NOT automatically rebuild web and mobile videos
# When noise reduction is complete, you should delete existing mobile/web videos
# and then rerun `make videos/mentor-<mentorid>
.PHONY: noise-reduce
noise-reduce-%:
docker run \
--rm \
--name $(DOCKER_CONTAINER) \
-v $(PWD)/data:/app/mounts/data \
-v $(PWD)/videos:/app/mounts/videos \
$(DOCKER_ARGS) \
$(DOCKER_IMAGE) videos-reduce-noise --mentor $* --data=/app/mounts/data/mentors
# Build checkpoint from mentor data
.PHONY: checkpoint/%
checkpoint/%: data/mentors/%
cd $(PROJECT_ROOT)/checkpoint && \
CHECKPOINT=dev_latest $(MAKE) checkpoint-clean/mentor/$* checkpoint-train/mentor/$*
@echo ""
@echo "==== MAKE CHECKPOINT SUCCEEDED! ===="
@echo ""
@echo "If you have generated mentor videos, you can test the environment locally with:"
@echo " cd .. && make local-run-dev"
@echo ""
@echo "Then view your mentor here:"
@echo " http://localhost:8080/mentorpanel/?mentor=$*"