In [None]:
import sys
import pandas as pd
sys.path.append("../")
from pathlib import Path

### Retrieve City Council meetings CSV created using meetings.ipynb

In [None]:
# Read the meetings CSV file
meeting_data = "../data/meetings.csv"
meetings_df = pd.read_csv(meeting_data)

# Convert date column to datetime
meetings_df['date'] = pd.to_datetime(meetings_df['date'])

# Sort by date in descending order (latest first)
meetings_df = meetings_df.sort_values('date', ascending=False)

city_council_meetings = meetings_df[
    meetings_df["meeting"].str.contains("Regular Council Meeting")
]

city_council_meetings.head()

### Select a meeting to work on

In [None]:
import ipywidgets as wdgt

# Create dropdown widget with video URLs and meeting info
options = {
    f"{row['meeting']} - {row['date'].strftime('%Y-%m-%d')}": {
        'url': row['video'],
        'label': f"{row['meeting']} - {row['date'].strftime('%Y-%m-%d')}"
    }
    for _, row in city_council_meetings.iterrows()
}

video_select = wdgt.Dropdown(
    options={k: v['url'] for k, v in options.items()},
    description='Select Video:',
    style={'description_width': 'initial'},
    layout=wdgt.Layout(width='75%')
)

# Update video_url and file_name variables when selection changes
def update_video_url(change):
    global video_url, file_name
    video_url = change.new
    # Get the label for the selected URL
    selected_label = [k for k,v in options.items() if v['url'] == change.new][0]
    # Create safe filename by replacing spaces and special chars
    file_name = selected_label.lower().replace(' ', '_').replace('-', '_')
    file_name = ''.join(c for c in file_name if c.isalnum() or c == '_')

video_select.observe(update_video_url, names='value')
display(video_select)


### Download the mp4 file from the meeting player page
The meeting list includes a url to the granicus.com video player.
The page includes a video stream url which is not practical to use for downloading.  The function get_video_player return a GranicusPlayerPage object that includes a `download_url` which is created from the streaming url.


In [None]:
import os
import sys
from pathlib import Path
import requests
from src.models.meeting import GranicusPlayerPage
from src.granicus import get_video_player
from src.videos import download_file
# You may need to turn off VPN
sys.path.append("../")  # Make sure we can import from scripts

# Create output directory if it doesn't exist
VIDEO_DIRECTORY = Path("../data/video")
VIDEO_DIRECTORY.mkdir(parents=True, exist_ok=True)

# Define output path for the video
output_path = VIDEO_DIRECTORY / f"{file_name}.mp4"

# Get video player page info
player_page: GranicusPlayerPage = await get_video_player(video_url)

# Run the download
video_file = download_file(player_page.download_url, output_path)

# Display the result
if video_file:
    print(f"Video saved to: {video_file}")

### Use the downloaded mp4 file to extract a wav file

In [None]:
import asyncio
import os
from pathlib import Path
from src.videos import save_audio


# Create output directory for audio files if it doesn't exist
audio_dir = Path("../data/audio")
audio_dir.mkdir(parents=True, exist_ok=True)

# Assuming video_file contains the path to the downloaded video
# This would be the result from the previous download_video call
if not os.path.exists(video_file):
    print(f"Error: Video file not found at {video_file}")
else:
    # Define output path for the audio
    video_filename = os.path.basename(video_file)
    base_filename = os.path.splitext(video_filename)[0]
    audio_path = audio_dir / f"{base_filename}.wav"

    print(f"Extracting audio from: {video_file}")
    print(f"Saving audio to: {audio_path}")

    audio_file = await save_audio(
                video_path=video_file,
                output_path=str(audio_path),
            )

    if audio_file:
        print(f"Audio saved to: {audio_file}")
        print(f"Audio file size: {os.path.getsize(audio_file) / (1024 * 1024):.2f} MB")

### Convert the video file into a transcipt
This step requires a huggingface login and api_token.
You will also need to agree to terms on each of the following models:
- guillaumekln/faster-whisper
- 


In [None]:
from src.videos import transcribe_video

video_file = "../data/video/regular_council_meeting___2025_02_26.mp4"

transcription_dir = Path("../data/transcripts")

transcription = await transcribe_video(video_file, transcription_dir)

In [None]:
from src.videos import transcribe_video_with_diarization

video_file = "../data/video/regular_council_meeting___2025_02_26.mp4"

transcription_dir = Path("../data/transcripts")

transcription = await transcribe_video_with_diarization(video_file, transcription_dir)