In [None]:
!pip install selenium
!apt install libasound2-dev portaudio19-dev libportaudio2 libportaudiocpp0 ffmpeg
!pip install pyaudio
!pip install wave
!pip install pydub
!pip install -U -q google-generativeai

In [None]:
import google.generativeai as genai
from google.colab import userdata
GOOGLE_API_KEY=userdata.get('GOOGLE_API_KEY')
genai.configure(api_key=GOOGLE_API_KEY)

In [None]:

from threading import Thread
from time import sleep
from selenium import webdriver
from datetime import datetime
import pyaudio
import wave
from pydub import AudioSegment
import os
import shutil
FRAME_PREFIX = "_frame"

# # Initialize the Chrome driver
# driver = webdriver.Chrome(executable_path="path/to/chromedriver")

# # Navigate to the desired website
# driver.get("https://www.googlemeet.com")


class File:
  def __init__(self, file_path: str, display_name: str = None):
    self.file_path = file_path
    if display_name:
      self.display_name = display_name
    timestamp = os.path.getmtime(file_path)
    self.timestamp = datetime.datetime.fromtimestamp(timestamp)

  def set_file_response(self, response):
    self.response = response

def get_timestamp(filename):
  """Extracts the frame count (as an integer) from a filename with the format
     'output_file_prefix_frame00:00.jpg'.
  """
  parts = filename.split(FRAME_PREFIX)
  if len(parts) != 2:
      return None  # Indicates the filename might be incorrectly formatted
  return parts[1].split('.')[0]

# TODO ha there's a bug in their code. the reason why we can't sample at freq > 1s is
# ... their name the files with a "min:sec" timestamp

def upload_audio(directory):
  file=File(file_path=os.path.join(directory, "audio", "out.mp3"))
  print(f'Uploading: {file.file_path}...')
  response = genai.upload_file(path=file.file_path)
  file.set_file_response(response)
  return file

def upload_frames(directory):
  # Process each frame in the output directory
  files = os.listdir(os.path.join(directory,"audio"))
  files = sorted(files)
  files_to_upload = []
  for file in files:
    files_to_upload.append(
        File(file_path=os.path.join(directory, file)))

  # Upload the files to the API
  uploaded_files = []
  print(f' {len(files_to_upload)} files. This might take a bit...')

  for file in files_to_upload:
    print(f'Uploading: {file.file_path}...')
    response = genai.upload_file(path=file.file_path)
    file.set_file_response(response)
    uploaded_files.append(file)

  print(f"Completed file uploads!\n\nUploaded: {len(uploaded_files)} files")
  return uploaded_files

def call_gemini(uploaded_files, uploaded_audio):
  # Create the prompt.
  prompt = "Watch and describe this video. An audio for it will be given after."
  prompt_audio = "Here is the audio. Describe the video with audio."

  # Set the model to Gemini 1.5 Pro.
  model = genai.GenerativeModel(model_name="models/gemini-1.5-pro-latest")

  # Make GenerateContent request with the structure described above.
  def make_request(prompt, prompt_audio, files, audio):
    request = [prompt]
    for file in files:
      request.append(file.timestamp)
      request.append(file.response)
    # TODO How do we add audio exactly.. do we need the timestamp too?
    request.append(prompt_audio)
    request.append(audio.response)
    return request

  # Make the LLM request.
  request = make_request(prompt, prompt_audio, uploaded_files, uploaded_audio)
  response = model.generate_content(request,
                                    request_options={"timeout": 600})
  print(response.text)
  return response.text

def record_audio(tmpfolder):
  CHUNK = 1024
  FORMAT = pyaudio.paInt16
  CHANNELS = 2
  RATE = 44100
  RECORD_SECONDS = 30
  WAVE_OUTPUT_FILENAME = os.path.join(tmpfolder,"audio","out.wav")

  # Initialize PyAudio
  p = pyaudio.PyAudio()

  # Open audio stream
  stream = p.open(format=FORMAT,
                  channels=CHANNELS,
                  rate=RATE,
                  input=True,
                  frames_per_buffer=CHUNK)
  
  # Create a list to store frames
  frames = []

  # Record audio for specified duration
  for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
      data = stream.read(CHUNK)
      frames.append(data)

  # Stop and close the audio stream
  stream.stop_stream()
  stream.close()
  p.terminate()

  # Save the recorded audio to a WAV file
  wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
  wf.setnchannels(CHANNELS)
  wf.setsampwidth(p.get_sample_size(FORMAT))
  wf.setframerate(RATE)
  wf.writeframes(b''.join(frames))
  wf.close()

  audio = AudioSegment.from_wav(WAVE_OUTPUT_FILENAME)
  mp3_filename =  os.path.join(tmpfolder, "audio", "out.mp3")
  audio.export(mp3_filename, format="mp3")


def capture_screenshots(tmpfolder):
  for i in range(30):
    driver.capture_screenshots(tmpfolder)
    # Capture the entire browser window
    screenshot = driver.get_screenshot_as_png()

    # Save the screenshot as an image file
    min = 0
    sec = i
    # TODO we can try if there's any differerence using 00:00 vs actual timestamp (e.g. 12:56AM)
    time_string = f"{min:02d}:{sec:02d}"
    screenshot_filename = os.path.join(tmpfolder, "video", f"{FRAME_PREFIX}{time_string}.png")
    with open(screenshot_filename, "wb") as file:
        file.write(screenshot)
    sleep(1)


def record_and_call_gemini(tmpfolder):
    # Create a temporary folder
    os.makedirs(tmpfolder,exist_ok=True)
    os.makedirs(os.path.join(tmpfolder,"video"),exist_ok=True)
    os.makedirs(os.path.join(tmpfolder,"audio"),exist_ok=True)

    # Call audio and video capture threads
    tr1=Thread(target=record_audio, args=(tmpfolder))
    tr2=Thread(target=capture_screenshots, args=(tmpfolder,))
    tr1.start()
    tr2.start()
    tr1.join(30+1)
    tr2.join(30+1)

    # Upload files
    uploaded_files=upload_frames(tmpfolder)
    uploaded_audio=upload_audio(tmpfolder)
    # Call Gemini
    response=call_gemini(uploaded_files, uploaded_audio)

    # Send another thread to clean up tmp files
    th=Thread(target=clean_up,args=(tmpfolder,uploaded_files,uploaded_audio))
    th.daemon=True
    th.start()

    return response

# okay when do we clean up...
def clean_up(tmpfolder,uploaded_files,uploaded_audio):
    # Remove recorded data after calling Gemini API
    print(f'Deleting {len(uploaded_files)+1} uploaded files. This might take a bit...')
    for file in uploaded_files:
      genai.delete_file(file.response.name)
      print(f'Deleted {file.file_path} at URI {file.response.uri}')
    genai.delete_file(uploaded_audio.response.name)
    print(f"Completed deleting uploaded files!\n\nDeleted: {len(uploaded_files)+1} files")
    print('Deleting local files.')
    shutil.rmtree(tmpfolder)
    print(f"Completed deleting local files!")

def main_thread():
  # Record meeting by 30 second chunks
  while True:
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    tmpfolder= os.path.join(TMPFOLDER,timestamp)
    th=Thread(target=record_and_call_gemini,args=(tmpfolder,))
    th.start()
    # Let recording run for 30 seconds
    sleep(30) 
    # Kill thread if it runs for too long
    th.join(60)

TMPFOLDER=os.path.join('.',"tmp")
os.makedirs(TMPFOLDER,exist_ok=True)
tr=Thread(target=main_thread)
tr.start()
tr.join()
shutil.rmtree(TMPFOLDER)