# Module 3 - Reasoning NLP (Colab)

This notebook runs Reasoning-NLP pipeline (G1->G8) and validates artifacts.


In [None]:
from google.colab import drive
drive.mount('/content/drive')


In [None]:
!apt-get update -y
!apt-get install -y ffmpeg
!pip install jsonschema


In [None]:
import os
import uuid
from pathlib import Path

REPO_DIR = '/content/video-summary'
if not os.path.exists(REPO_DIR):
    !git clone https://github.com/TCTri205/video-summary.git {REPO_DIR}
%cd {REPO_DIR}

VIDEO_NAME = 'video1'
PROCESSED_ROOT = '/content/drive/MyDrive/video-summary/processed'
RAW_VIDEO = '/content/drive/MyDrive/video-summary/input/raw_video.mp4'
EXTRACTION_DIR = Path(PROCESSED_ROOT) / VIDEO_NAME / 'extraction'
AUDIO_TRANSCRIPTS = EXTRACTION_DIR / 'audio_transcripts.json'
VISUAL_CAPTIONS = EXTRACTION_DIR / 'visual_captions.json'
ARTIFACTS_ROOT = '/content/drive/MyDrive/video-summary/artifacts'
RUN_ID = f'colab_rnlp_{uuid.uuid4().hex[:8]}'
print('RUN_ID =', RUN_ID)


In [None]:
!python -m reasoning_nlp.pipeline_runner \
  --audio-transcripts "{AUDIO_TRANSCRIPTS}" \
  --visual-captions "{VISUAL_CAPTIONS}" \
  --raw-video "{RAW_VIDEO}" \
  --stage g8 \
  --run-id "{RUN_ID}" \
  --artifacts-root "{ARTIFACTS_ROOT}"


In [None]:
RUN_DIR = Path(ARTIFACTS_ROOT) / RUN_ID
ALIGNMENT = RUN_DIR / 'g2_align' / 'alignment_result.json'
SCRIPT = RUN_DIR / 'g5_segment' / 'summary_script.json'
MANIFEST = RUN_DIR / 'g5_segment' / 'summary_video_manifest.json'
REPORT = RUN_DIR / 'g8_qc' / 'quality_report.json'

!python docs/Reasoning-NLP/schema/validate_artifacts.py \
  --alignment "{ALIGNMENT}" \
  --script "{SCRIPT}" \
  --manifest "{MANIFEST}" \
  --report "{REPORT}" \
  --contracts-dir contracts/v1/template


In [None]:
from IPython.display import Video
OUTPUT_VIDEO = RUN_DIR / 'g7_assemble' / 'summary_video.mp4'
print('Output video:', OUTPUT_VIDEO)
Video(str(OUTPUT_VIDEO), embed=True)
