In [26]:
from __future__ import print_function

import os
import subprocess
from dotenv import load_dotenv
import sys
import importlib
import traceback
import asyncio
import os.path
import io
import time
from googleapiclient.discovery import build
from googleapiclient.http import MediaIoBaseDownload
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
import pickle
from googleapiclient.errors import HttpError
from tools.gpt_functions import generate_gpt4_response, generate_gpt4_response_async
from tools.notification_functions import send_email, send_email_async
for k,v in list(sys.modules.items()):
    if k.startswith('tools') or k.startswith('.env'):
        importlib.reload(v)


# Load the environment variables from the .env file
load_dotenv()

FOLDER_ID_TO_EMAIL = {
    '1Qdrs4naVqJH2KIcr1maQ3vuq5DGuDK-G': 'scha@cancelledfoodcoupon.com',
    '1AUSninKPQ9mZXFaISKAXPRv4RzpB9oNx': 'mike@mantisnetworks.co',
    '1UHH7ZuFS8anO_NIFqe25SHqPun_stmeQ': 'clint@mantisnetworks.co',
    '1ibUUpCy74WUROr5TSa-pLYLYM6ivUUmZ': 'loren@mantisnetworks.co',
    '1SwickgZ8MDK_BIyL7IhSn0oZdVxAMzHE': 'joshua.stapleton.ai@gmail.com',
    '1BO0yHZO8CfrSzX2SvWhD1uUbq_M3L7X2': 'bartdenil12@gmail.com',
}

# folder ids from google drive
FOLDER_NAME_TO_FOLDER_ID = {
    'audios_scha': '1Qdrs4naVqJH2KIcr1maQ3vuq5DGuDK-G',
    'audios_mike': '1AUSninKPQ9mZXFaISKAXPRv4RzpB9oNx',
    'audios_clint': '1UHH7ZuFS8anO_NIFqe25SHqPun_stmeQ',
    'audios_loren': '1ibUUpCy74WUROr5TSa-pLYLYM6ivUUmZ',
    'audios_josh': '1SwickgZ8MDK_BIyL7IhSn0oZdVxAMzHE',
    'audios_bart': '1BO0yHZO8CfrSzX2SvWhD1uUbq_M3L7X2',
}

In [20]:
# import wave
# import numpy as np


# def mono_to_stereo(filename:str): # input is .wav, output is .wav
#     # Open mono wave file
#     mono_wave = wave.open(filename, 'rb')

#     # Extract frames as bytes
#     mono_frames = mono_wave.readframes(-1)

#     # Convert bytes to integers between -32768 and 32767 (inclusive)
#     mono_audio = np.frombuffer(mono_frames, dtype=np.int16)

#     # Stack the mono audio to make it stereo
#     stereo_audio = np.column_stack((mono_audio, mono_audio))

#     # Convert stereo audio back to bytes
#     stereo_frames = stereo_audio.tobytes()

#     # Open a new wave file for the stereo audio
#     stereo_wave = wave.open(filename.split('.')[0] + '.wav', 'wb')

#     # Set parameters (nchannels = 2 for stereo)
#     stereo_wave.setnchannels(2)
#     stereo_wave.setsampwidth(mono_wave.getsampwidth())
#     stereo_wave.setframerate(mono_wave.getframerate())
#     stereo_wave.setnframes(mono_wave.getnframes())

#     # Write frames to wave file
#     stereo_wave.writeframes(stereo_frames)

#     # Close files
#     mono_wave.close()
#     stereo_wave.close()


In [23]:
async def whisper_async(target_file:str, local_folder:str): # does this need to be async?
    base = os.path.splitext(target_file)[0]
    input_file = os.path.join(local_folder, target_file)
    output_file = os.path.join(local_folder, f"{base}.wav")

    if target_file.endswith('.m4a'):
        print(f"Processing {target_file} file as .m4a...")
        process = await asyncio.create_subprocess_exec(
            'ffmpeg', '-i', input_file, '-ar', '16000', output_file, 
            stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT
        )
        await process.wait()
    elif target_file.endswith('.mp3'):
        print(f"Processing {target_file} file as .mp3...")
        process = await asyncio.create_subprocess_exec(
            'ffmpeg', '-i', input_file, '-ar', '16000', output_file, 
            stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT
        )
        await process.wait()
    else:
        print(f"Unsupported file format: {target_file}")
        return

    print("Running whisper to get transcript...")
    process = await asyncio.create_subprocess_exec(
        './main', '-f', output_file, stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT
    )
    await process.wait()

    return output_file

In [15]:
async def fred_async(filename:str):
    try:
        print("Running Fred...")
        with open(filename, 'r') as file:
            transcript = file.read()
        response = await generate_gpt4_response_async(transcript, 1) # we await this because we can't send the email until we get a response from the API
        asyncio.create_task(send_email_async(os.environ.get('SENDING_EMAIL_ADDRESS'), 'joshua.stapleton.ai@gmail.com', "FRED response for " + filename, response + "\n\n-----------------------\n\nTRANSCRIPT:\n" + transcript, os.environ.get('EMAIL_PASSWORD')))

        # UNCOMMENT TO SEND TO CORRECT EMAIL
        receiving_email_address = FOLDER_ID_TO_EMAIL.get(filename.split('/')[0])  # Look up the email address
        # if receiving_email_address:  # If an email address was found
        #     asyncio.create_task(send_email_async(os.environ.get('SENDING_EMAIL_ADDRESS'), receiving_email_address, "FRED response for " + filename, response + "\n\n-----------------------\n\nTRANSCRIPT:\n" + transcript, os.environ.get('EMAIL_PASSWORD')))
    except Exception as e:
        print(f"Exception in fred_async: {e}")
        traceback.print_exc()

In [17]:
async def whisper_and_fred(target_file:str, local_folder:str):
    filename = await whisper_async(target_file, local_folder) # need to wait for whisper to finish running before firing off fred
    response = await fred_async(filename + ".txt")
    return response

In [5]:
def download_file(drive_service, file, local_folder):
    # Download file
    request = drive_service.files().get_media(fileId=file['id'])
    fh = io.BytesIO()
    downloader = MediaIoBaseDownload(fh, request)
    done = False
    while done is False:
        status, done = downloader.next_chunk()
    downloaded_file_path = os.path.join(local_folder, file['name'])
    with io.open(downloaded_file_path, 'wb') as f:
        print("Writing file...", file['name'])
        fh.seek(0)
        f.write(fh.read())
    
    return downloaded_file_path

In [25]:
# If modifying these SCOPES, delete the file token.pickle.
SCOPES = ['https://www.googleapis.com/auth/drive.readonly']

async def main():
    try:
        """Shows basic usage of the Drive v3 API.
        Lists the names and ids of the first 10 files the user has access to.
        """
        creds = None
        # The file token.pickle stores the user's access and refresh tokens, and is
        # created automatically when the authorization flow completes for the first
        # time.
        if os.path.exists('token.pickle'):
            with open('token.pickle', 'rb') as token:
                creds = pickle.load(token)
        # If there are no (valid) credentials available, let the user log in.
        if not creds or not creds.valid:
            if creds and creds.expired and creds.refresh_token:
                creds.refresh(Request())
            else:
                flow = InstalledAppFlow.from_client_secrets_file('client_secret_782650429580-k51cnfcs0gmn6kdkn7t5elbchinpspo1.apps.googleusercontent.com.json', SCOPES)
                creds = flow.run_local_server(port=0)
            # Save the credentials for the next run
            with open('token.pickle', 'wb') as token:
                pickle.dump(creds, token)

        drive_service = build('drive', 'v3', credentials=creds)

        # Get a list of already downloaded files
        downloaded_files = {local_folder: set(os.listdir(local_folder)) for local_folder in FOLDER_NAME_TO_FOLDER_ID.keys()}
        print("Downloaded files:", downloaded_files)

        # Continuously poll Google Drive folder for new files
        while True:
            for local_folder, folder_id in FOLDER_NAME_TO_FOLDER_ID.items():
                # print(local_folder)
                request = drive_service.files().list(
                q="'{}' in parents and trashed = false".format(folder_id),
                fields='nextPageToken, files(id, name)',
                pageToken=None).execute()
                
                # Get all files in the Google Drive folder
                all_files = request.get('files', [])
                # print("All files currently in GD:", all_files)

                # Remove already downloaded files
                files_to_download = [file for file in all_files if file['name'] not in downloaded_files[local_folder]]
                # print("Files to download:", files_to_download)
                
                # only for files which have not been downloaded
                for file in files_to_download:
                    print("Detected new file in", local_folder)
                    downloaded_file_path = download_file(drive_service, file, local_folder) # this is blocking so we can't make it async
                    
                    # Add file to the record of downloaded files so do we have to await
                    downloaded_files[local_folder].add(file['name']) # only do this if it was downloaded successfully

                    # process the file to .wav and run Fred - get GPT response and send email
                    asyncio.create_task(whisper_and_fred(target_file=file['name'], local_folder=local_folder))
                await asyncio.sleep(1) # allows for progress to be made on another coroutine - namely, whisper_and_fred


    except HttpError as error:
        print(f"An HTTP error occurred: {error}")

# if __name__ == '__main__':
#     main()
await main() # for notebook since event loop already created

Downloaded files: {'audios_scha': {'Recording_325.wav.txt', 'Octopi trust questions 26072023.m4a', 'Recording_323.wav.txt', 'Recording_330.m4a', 'Recording_329.m4a', 'Recording_329.wav', 'Mantis support 26072023.wav.txt', 'Recording_334.wav', 'Recording_325.wav', 'Mantis support 26072023.wav', 'Recording_330.wav.txt', 'Recording_332.wav', 'Recording_334.m4a', 'Recording_332.wav.txt', 'Recording_331.wav.txt', 'Octopi trust questions 26072023.wav', 'Recording_323.wav', 'Recording_327.wav', 'Recording_329.wav.txt', 'Recording_331.m4a', 'Recording_323.m4a', 'Recording_327.wav.txt', 'Recording_332.m4a', 'Recording_330.wav', 'Octopi trust questions 26072023.wav.txt', 'Recording_331.wav', 'Recording_327.m4a', 'Audio from ☕', 'Mantis support 26072023.m4a', 'Recording_325.m4a'}, 'audios_mike': {'Voice 002.wav.txt', 'Voice 002.wav', 'Voice 004 - Copy.m4a', 'Voice 003.wav', 'Voice 003.wav.txt', 'Voice 004.wav.txt', 'Voice 002.m4a', 'Voice 004 - Copy.wav', 'Voice 004 - Copy.wav.txt', 'Voice 003.m4

CancelledError: 