In [9]:
from __future__ import print_function

import os
import subprocess
from dotenv import load_dotenv
import sys
import importlib
import traceback
import asyncio
import os.path
import io
from datetime import datetime
from googleapiclient.discovery import build
from googleapiclient.http import MediaIoBaseDownload
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
import pickle
from googleapiclient.errors import HttpError
from concurrent.futures import ThreadPoolExecutor
from tools.gpt_functions import generate_gpt4_response_async
from tools.notification_functions import send_email_async
for k,v in list(sys.modules.items()):
    if k.startswith('tools') or k.startswith('.env'):
        importlib.reload(v)


# Load the environment variables from the .env file
load_dotenv()

FOLDER_ID_TO_EMAIL = {
    '1Qdrs4naVqJH2KIcr1maQ3vuq5DGuDK-G': 'scha@cancelledfoodcoupon.com',
    '1AUSninKPQ9mZXFaISKAXPRv4RzpB9oNx': 'mike@mantisnetworks.co',
    '1UHH7ZuFS8anO_NIFqe25SHqPun_stmeQ': 'clint@mantisnetworks.co',
    '1ibUUpCy74WUROr5TSa-pLYLYM6ivUUmZ': 'loren@mantisnetworks.co',
    '1SwickgZ8MDK_BIyL7IhSn0oZdVxAMzHE': 'joshua.stapleton.ai@gmail.com',
    '1BO0yHZO8CfrSzX2SvWhD1uUbq_M3L7X2': 'bartdenil12@gmail.com',
    '1sA77uMfOftiR8njcATy_IFBng8-apXv0': 'brensuzy@gmail.com',
    '1csJ4knxQ5Yp4vESB85ZMxkL8e5qKKkE_': 'brendanjstapleton@gmail.com',
}

# folder ids from google drive
FOLDER_NAME_TO_FOLDER_ID = {
    'audios_scha': '1Qdrs4naVqJH2KIcr1maQ3vuq5DGuDK-G',
    'audios_mike': '1AUSninKPQ9mZXFaISKAXPRv4RzpB9oNx',
    'audios_clint': '1UHH7ZuFS8anO_NIFqe25SHqPun_stmeQ',
    'audios_loren': '1ibUUpCy74WUROr5TSa-pLYLYM6ivUUmZ',
    'audios_josh': '1SwickgZ8MDK_BIyL7IhSn0oZdVxAMzHE',
    'audios_bart': '1BO0yHZO8CfrSzX2SvWhD1uUbq_M3L7X2',
    'audios_mom': '1sA77uMfOftiR8njcATy_IFBng8-apXv0',
    'audios_dad': '1csJ4knxQ5Yp4vESB85ZMxkL8e5qKKkE_'
}

FOLDER_NAME_TO_EMAIL = {folder_name: FOLDER_ID_TO_EMAIL[folder_id] for folder_name, folder_id in FOLDER_NAME_TO_FOLDER_ID.items()}

In [10]:
async def whisper_async(target_file:str, local_folder:str):
    base = os.path.splitext(target_file)[0]
    input_file = os.path.join(local_folder, target_file)
    output_file = os.path.join(local_folder, f"{base}.wav")

    if target_file.endswith('.m4a') or target_file.endswith('.mp3'):
        print(f"Processing {target_file}...")
        process = await asyncio.create_subprocess_exec(
            'ffmpeg', '-i', input_file, '-ar', '16000', output_file, 
            stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT
        )
        await process.wait()
    elif target_file.endswith('.wav'): # move straight to F
        print(f"Processing {target_file} file as .wav...")
    else:
        print(f"Unsupported file format: {target_file}")
        return

    print("Running whisper to get transcript...")
    process = await asyncio.create_subprocess_exec(
        './main', '-f', output_file, stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT
    )
    await process.wait()

    return output_file

In [11]:
async def fred_async(filename:str):
    print("FILENAME:", filename)
    receiving_email_address = FOLDER_NAME_TO_EMAIL.get(filename.split('/')[0])
    try:
        if ("transcript" in filename.lower()) or ('transcribe' in filename.lower()): # simply send the transcribed text
            print("Sending transcript...")
            with open(filename, 'r') as file:
                transcript = file.read()
            # Code to send the transcript
            asyncio.create_task(send_email_async(os.environ.get('SENDING_EMAIL_ADDRESS'), 'joshua.stapleton.ai@gmail.com', "FRED response for " + filename, "TRANSCRIPT:\n" + transcript, os.environ.get('EMAIL_PASSWORD')))

            if receiving_email_address:  # If an email address was found
                asyncio.create_task(send_email_async(os.environ.get('SENDING_EMAIL_ADDRESS'), receiving_email_address, "FRED response for " + filename, "TRANSCRIPT:\n" + transcript, os.environ.get('EMAIL_PASSWORD')))

        else:
            print("Running Fred...")
            with open(filename, 'r') as file:
                transcript = file.read()
                response = await generate_gpt4_response_async(transcript) # we await this because we can't send the email until we get a response from the API
                if response == '':
                    print("No response from GPT-4")
                else:
                    print(response)
                    asyncio.create_task(send_email_async(os.environ.get('SENDING_EMAIL_ADDRESS'), 'joshua.stapleton.ai@gmail.com', "FRED response for " + filename, response + "\n\n-----------------------\n\nTRANSCRIPT:\n" + transcript, os.environ.get('EMAIL_PASSWORD')))

                # UNCOMMENT TO SEND TO CORRECT EMAIL
                if receiving_email_address:  # If an email address was found
                    asyncio.create_task(send_email_async(os.environ.get('SENDING_EMAIL_ADDRESS'), receiving_email_address, "FRED response for " + filename, response + "\n\n-----------------------\n\nTRANSCRIPT:\n" + transcript, os.environ.get('EMAIL_PASSWORD')))

    except Exception as e:
        print(f"Exception in fred_async: {e}")
        traceback.print_exc()

In [12]:
# # run fred manually.
# await fred_async('audios_scha/Recording_364.wav.txt')

In [13]:
async def whisper_and_fred(target_file:str, local_folder:str):
    filename = await whisper_async(target_file, local_folder) # need to wait for whisper to finish running before firing off fred
    response = await fred_async(filename + ".txt")
    return response

In [14]:
# ideally we would just load the audios into memory to avoid all this download nonsense. 
# However, whisper requires a file path, so we have to download the files to disk first.
# Also, audios might be too big.
def download_file(drive_service, file, local_folder):
    # Download file
    request = drive_service.files().get_media(fileId=file['id'])
    fh = io.BytesIO()
    downloader = MediaIoBaseDownload(fh, request)
    done = False
    print("Downloading file...", file['name'])
    while done is False:
        status, done = downloader.next_chunk()
    downloaded_file_path = os.path.join(local_folder, file['name'])
    with io.open(downloaded_file_path, 'wb') as f:
        print("Writing file...", file['name'])
        fh.seek(0)
        f.write(fh.read())
    
    return downloaded_file_path


async def download_file_async(drive_service, file, local_folder, loop):
    with ThreadPoolExecutor() as executor:
        file_path = await loop.run_in_executor(executor, download_file, drive_service, file, local_folder)
        # Process downloaded file asynchronously here
        print("Downloaded and processed", file_path)

In [15]:
# If modifying these SCOPES, delete the file token.pickle.
SCOPES = ['https://www.googleapis.com/auth/drive.readonly']

async def main():
    try:
        """Shows basic usage of the Drive v3 API.
        Lists the names and ids of the first 10 files the user has access to.
        """
        creds = None
        # The file token.pickle stores the user's access and refresh tokens, and is
        # created automatically when the authorization flow completes for the first
        # time.
        if os.path.exists('token.pickle'):
            with open('token.pickle', 'rb') as token:
                creds = pickle.load(token)
        # If there are no (valid) credentials available, let the user log in.
        if not creds or not creds.valid:
            if creds and creds.expired and creds.refresh_token:
                creds.refresh(Request())
            else:
                flow = InstalledAppFlow.from_client_secrets_file('client_secret_782650429580-k51cnfcs0gmn6kdkn7t5elbchinpspo1.apps.googleusercontent.com.json', SCOPES)
                creds = flow.run_local_server(port=0)
            # Save the credentials for the next run
            with open('token.pickle', 'wb') as token:
                pickle.dump(creds, token)

        drive_service = build('drive', 'v3', credentials=creds)
        loop = asyncio.get_event_loop()
        # Get a list of already downloaded files
        downloaded_files = {local_folder: set(os.listdir(local_folder)) for local_folder in FOLDER_NAME_TO_FOLDER_ID.keys()}
        print("Downloaded files:", downloaded_files)

        # Continuously poll Google Drive folder for new files
        while True:
            for local_folder, folder_id in FOLDER_NAME_TO_FOLDER_ID.items():
                # print(local_folder)
                request = drive_service.files().list(
                q="'{}' in parents and trashed = false".format(folder_id),
                fields='nextPageToken, files(id, name)',
                pageToken=None).execute()
                
                # Get all files in the Google Drive folder
                all_files = request.get('files', [])
                # print("All files currently in GD:", all_files)

                # Remove already downloaded files
                files_to_download = [file for file in all_files if file['name'] not in downloaded_files[local_folder]]
                # print("Files to download:", files_to_download)
                
                # only for files which have not been downloaded
                for file in files_to_download: # this is not perfectly ideal, since
                    print("Detected new file in " + local_folder + ": " + file['name'] + " at " + datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
                    # loop.create_task(download_file_async(drive_service, file, local_folder, loop))

                    try:
                        downloaded_file_path = download_file(drive_service, file, local_folder) # this is blocking so we can't make it async
                    except Exception as e:
                        print(e)
                    
                    # Add file to the record of downloaded files so do we have to await
                    downloaded_files[local_folder].add(file['name']) # only do this if it was downloaded successfully

                    # process the file to .wav and run Fred - get GPT response and send email
                    asyncio.create_task(whisper_and_fred(target_file=file['name'], local_folder=local_folder))
                await asyncio.sleep(1) # allows for progress to be made on another coroutine - namely, whisper_and_fred

    except HttpError as error:
        print(f"An HTTP error occurred: {error}")


await main() # for notebook since event loop already created

Downloaded files: {'audios_scha': {'Recording_323.wav.txt', 'Recording_360.m4a', 'Recording_334.wav.txt', 'Recording_355.m4a', 'Mantis support 26072023.wav.txt', 'Recording_323.m4a', 'Recording_298.wav.txt', 'Mantis support 26072023.m4a', 'Recording_362.wav.txt', 'Recording_297.m4a', 'Recording_331.wav.txt', 'Recording_332.m4a', 'Recording_348.m4a', 'Recording_297.wav', 'Recording_323.wav', 'Recording_299.wav', 'Recording_299.m4a', 'Recording_349.m4a', 'Recording_327.m4a', 'Recording_298.m4a', 'Recording_340.m4a', 'Recording_327.wav.txt', 'Recording_299.wav.txt', 'Recording_334.m4a', 'Octopi trust questions 26072023.m4a', 'Recording_364.m4a', 'Email octopi 18072023.m4a', 'Recording_336.m4a', 'Recording_334.wav', 'Recording_338.m4a', 'Recording_298.wav', 'Recording_346.m4a', 'Recording_353.m4a', 'Recording_350.m4a', 'Recording_342.m4a', 'Recording_329.wav', 'Recording_300.wav.txt', 'Recording_325.wav.txt', 'Recording_363.wav.txt', 'Recording_332.wav.txt', 'Recording_331.m4a', 'Recording