Skip to content

Commit

Permalink
feature/store-session-content-hash-in-session-db-model (#158)
Browse files Browse the repository at this point in the history
* Catch ConnectionError in event gather pipeline

* Add in another catch and change to log.error

* Add session_content_hash to db model and intermediate pipeline steps

* Add tests

* Fix lint
  • Loading branch information
isaacna committed Jan 28, 2022
1 parent 0a43a37 commit ef049ad
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 0 deletions.
2 changes: 2 additions & 0 deletions cdp_backend/database/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,7 @@ def create_event(
def create_session(
session: ingestion_models.Session,
session_video_hosted_url: str,
session_content_hash: str,
event_ref: db_models.Event,
credentials_file: Optional[str] = None,
) -> db_models.Session:
Expand All @@ -228,6 +229,7 @@ def create_session(
db_session.session_datetime = session.session_datetime
db_session.video_uri = session_video_hosted_url
db_session.session_index = session.session_index
db_session.session_content_hash = session_content_hash

# Optional fields
db_session.caption_uri = session.caption_uri
Expand Down
4 changes: 4 additions & 0 deletions cdp_backend/database/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -468,6 +468,7 @@ class Session(Model):
event_ref = fields.ReferenceField(Event, required=True, auto_load=False)
session_datetime = fields.DateTime(required=True)
session_index = fields.NumberField(required=True)
session_content_hash = fields.TextField(required=True)
video_uri = fields.TextField(required=True, validator=validators.resource_exists)
caption_uri = fields.TextField(validator=validators.resource_exists)
external_source_id = fields.TextField()
Expand All @@ -490,6 +491,9 @@ def Example(cls) -> Model:
session.video_uri = (
"https://video.seattle.gov/media/council/brief_072219_2011957V.mp4"
)
session.session_content_hash = (
"05bd857af7f70bf51b6aac1144046973bf3325c9101a554bc27dc9607dbbd8f5"
)
return session

_PRIMARY_KEYS = ("event_ref", "video_uri")
Expand Down
5 changes: 5 additions & 0 deletions cdp_backend/pipeline/event_gather_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
class SessionProcessingResult(NamedTuple):
session: Session
session_video_hosted_url: str
session_content_hash: str
audio_uri: str
transcript: Transcript
transcript_uri: str
Expand Down Expand Up @@ -204,6 +205,7 @@ def create_event_gather_flow(
compile_session_processing_result( # type: ignore
session=session,
session_video_hosted_url=session_video_hosted_url,
session_content_hash=session_content_hash,
audio_uri=audio_uri,
transcript=transcript,
transcript_uri=transcript_uri,
Expand Down Expand Up @@ -941,6 +943,7 @@ def generate_thumbnails(
def compile_session_processing_result(
session: Session,
session_video_hosted_url: str,
session_content_hash: str,
audio_uri: str,
transcript: Transcript,
transcript_uri: str,
Expand All @@ -950,6 +953,7 @@ def compile_session_processing_result(
return SessionProcessingResult(
session=session,
session_video_hosted_url=session_video_hosted_url,
session_content_hash=session_content_hash,
audio_uri=audio_uri,
transcript=transcript,
transcript_uri=transcript_uri,
Expand Down Expand Up @@ -1292,6 +1296,7 @@ def store_event_processing_results(
session_db_model = db_functions.create_session(
session=session_result.session,
session_video_hosted_url=session_result.session_video_hosted_url,
session_content_hash=session_result.session_content_hash,
event_ref=event_db_model,
credentials_file=credentials_file,
)
Expand Down
4 changes: 4 additions & 0 deletions cdp_backend/tests/pipeline/test_event_gather_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -433,6 +433,7 @@ def test_calculate_in_majority(
pipeline.SessionProcessingResult(
session=session,
session_video_hosted_url="fake://doesnt-matter.mp4",
session_content_hash="fakehash123",
audio_uri="fake://doesnt-matter.wav",
transcript=EXAMPLE_TRANSCRIPT,
transcript_uri="fake://doesnt-matter-transcript.json",
Expand Down Expand Up @@ -461,6 +462,7 @@ def test_calculate_in_majority(
pipeline.SessionProcessingResult(
session=EXAMPLE_MINIMAL_EVENT.sessions[0],
session_video_hosted_url="fake://doesnt-matter.mp4",
session_content_hash="fakehash123",
audio_uri="ex://abc123-audio.wav",
transcript=EXAMPLE_TRANSCRIPT,
transcript_uri="ex://abc123-transcript.json",
Expand All @@ -476,6 +478,7 @@ def test_calculate_in_majority(
pipeline.SessionProcessingResult(
session=EXAMPLE_FILLED_EVENT.sessions[0],
session_video_hosted_url="fake://doesnt-matter-1.mp4",
session_content_hash="fakehash123",
audio_uri="ex://abc123-audio.wav",
transcript=EXAMPLE_TRANSCRIPT,
transcript_uri="ex://abc123-transcript.json",
Expand All @@ -485,6 +488,7 @@ def test_calculate_in_majority(
pipeline.SessionProcessingResult(
session=EXAMPLE_FILLED_EVENT.sessions[1],
session_video_hosted_url="fake://doesnt-matter-2.mp4",
session_content_hash="fakehash1234",
audio_uri="ex://def456-audio.wav",
transcript=EXAMPLE_TRANSCRIPT,
transcript_uri="ex://def456-transcript.json",
Expand Down

0 comments on commit ef049ad

Please sign in to comment.