In [1]:
#!pip install supabase
#!pip install colorama
from supabase import create_client
from typing import Optional, List, Dict, Union
from colorama import Fore, Style, init
from tqdm import tqdm
import time
import os
import pandas as pd
import json
import requests
from dotenv import load_dotenv

In [2]:
import os
from dotenv import load_dotenv
from colorama import init, Fore, Style
from supabase import create_client
from tqdm import tqdm

# Load environment variables from .env file
load_dotenv(dotenv_path='/Users/minghill/Desktop/BU/TalentTora/talentora-analysisbot/.env')

# Initialize colorama
init()

SUPABASE_URL = os.getenv("SUPABASE_URL")
SECRET_SUPABASE_KEY = os.getenv("SECRET_SUPABASE_KEY")

# Print the values to check if they are set correctly
print(f"SUPABASE_URL: {SUPABASE_URL}")
print(f"SECRET_SUPABASE_KEY: {SECRET_SUPABASE_KEY}")

class SupabaseDB:
    def __init__(self):
        print(f"{Fore.CYAN}Initializing Supabase client...{Style.RESET_ALL}")

        self.client = create_client(SUPABASE_URL, SECRET_SUPABASE_KEY)
        print(f"{Fore.GREEN}✓ Supabase client initialized successfully{Style.RESET_ALL}")

    def get_supabase_data(self, table: str, select_target: str, condition: list = None):
        """
        Fetch data from the given table with optional conditions.
        """
        print(f"{Fore.CYAN}Fetching data from table '{table}'...{Style.RESET_ALL}")
        query = self.client.table(table).select(select_target)
        if condition:
            query = query.eq(condition[0], condition[1])
        response = query.execute()
        print(f"{Fore.GREEN}✓ Data fetched successfully{Style.RESET_ALL}")
        return response

    def insert_supabase_data(self, table: str, data_for_insert: dict) -> dict:
        """
        Insert data into the given table.

        Args:
            table (str): The name of the table to insert into
            data_for_insert (dict): The data to insert

        Returns:
            dict: Response data or error message
        """
        print(f"{Fore.CYAN}Inserting data into table '{table}'...{Style.RESET_ALL}")
        try:
            with tqdm(total=1, desc="Inserting", unit="record") as pbar:
                response = self.client.table(table).insert(data_for_insert).execute()
                pbar.update(1)

            # Check if insert was successful
            if response and response.data:
                print(f"{Fore.GREEN}✓ Data inserted successfully{Style.RESET_ALL}")
                return {"success": True, "data": response.data[0]}

            print(f"{Fore.RED}✗ No data inserted{Style.RESET_ALL}")
            return {"success": False, "error": "No data inserted"}

        except Exception as e:
            print(f"{Fore.RED}✗ Error inserting data: {str(e)}{Style.RESET_ALL}")
            return {"success": False, "error": str(e)}

    def update_supabase_data(self, table: str, data_for_update: dict, condition: list):
        """
        Update data in the given table with conditions.
        """
        print(f"{Fore.CYAN}Updating data in table '{table}'...{Style.RESET_ALL}")
        try:
            with tqdm(total=1, desc="Updating", unit="record") as pbar:
                response = (
                    self.client.table(table)
                    .update(data_for_update)
                    .eq(condition[0], condition[1])
                    .execute()
                )
                pbar.update(1)

            if response.data:
                print(f"{Fore.GREEN}✓ Data updated successfully{Style.RESET_ALL}")
                return response.data
            else:
                print(f"{Fore.RED}✗ No data updated{Style.RESET_ALL}")
                return {"error": "No data updated."}
        except Exception as e:
            print(f"{Fore.RED}✗ Error updating data: {str(e)}{Style.RESET_ALL}")
            return {"error": str(e)}

    def list_buckets(self) -> Optional[List[dict]]:
        """List all storage buckets"""
        try:
            response = self.client.storage.list_buckets()
            if response:
                print(f"{Fore.GREEN}Found {len(response)} buckets{Style.RESET_ALL}")
            return response
        except Exception as e:
            print(f"{Fore.RED}Error listing buckets: {str(e)}{Style.RESET_ALL}")
            return None

    def get_bucket(self, bucket_id: str) -> Optional[dict]:
        """Get details of a specific bucket"""
        try:
            response = self.client.storage.get_bucket(bucket_id)
            if response:
                print(f"{Fore.GREEN}Retrieved bucket: {response.name}{Style.RESET_ALL}")
            return response
        except Exception as e:
            print(f"{Fore.RED}Error retrieving bucket {bucket_id}: {str(e)}{Style.RESET_ALL}")
            return None

    def list_storage_files(self, bucket_id: str, folder_path: str = "", limit: Optional[int] = None) -> Optional[List[str]]:
        """
        List files in a storage bucket with optional folder path and limit.

        Args:
            bucket_id (str): The ID of the storage bucket
            folder_path (str): Optional path to a specific folder
            limit (int, optional): Maximum number of files to return

        Returns:
            Optional[List[str]]: List of file paths or None if error occurs
        """
        print(f"{Fore.CYAN}Listing files in bucket '{bucket_id}'...{Style.RESET_ALL}")
        try:
            folder_path = folder_path.strip('/')
            options = {"limit": limit} if limit else None
            response = self.client.storage.from_(bucket_id).list(
                path=folder_path,
                options=options
            )
            file_list = [f"{folder_path}/{file['name']}" if folder_path else file['name']
                        for file in response if file['id']]
            print(f"{Fore.GREEN}✓ Found {len(file_list)} files{Style.RESET_ALL}")
            return file_list
        except Exception as e:
            print(f"{Fore.RED}✗ Error listing files: {str(e)}{Style.RESET_ALL}")
            return None

    def upload_file(self, bucket_id: str, destination_path: str, file_data) -> dict:
        """
        Upload a file to storage bucket.

        Args:
            bucket_id (str): The ID of the storage bucket
            destination_path (str): Path where file should be stored
            file_data: The file data to upload

        Returns:
            dict: Success status and response data or error message
        """
        print(f"{Fore.CYAN}Uploading file to '{destination_path}' in bucket '{bucket_id}'...{Style.RESET_ALL}")
        try:
            destination_path = destination_path.strip('/')
            # with tqdm(total=1, desc="Uploading", unit="file") as pbar:
            response = self.client.storage.from_(bucket_id).upload(
                path=destination_path,
                file=file_data,
                file_options={"upsert": "true"}
            )
                # pbar.update(1)
            print(f"{Fore.GREEN}✓ File uploaded successfully{Style.RESET_ALL}")
            # return {"success": True, "data": response}
        except Exception as e:
            print(f"{Fore.RED}✗ Error uploading file{Style.RESET_ALL}")
            # return {"success": False, "error": str(e)}

    def create_signed_url(self, bucket_id: str, files: List[str], expires_in: int = 3600) -> Optional[str]:
        """
        Create a signed URL for file access.

        Args:
            bucket_id (str): The ID of the storage bucket
            file_path (str): Path to the file
            expires_in (int): Number of seconds until URL expires

        Returns:
            Optional[str]: Signed URL or None if error occurs
        """
        print(f"{Fore.CYAN}Creating signed URL for '{files}'...{Style.RESET_ALL}")
        try:
            response = self.client.storage.from_(bucket_id).create_signed_urls(
                paths=files,
                expires_in=expires_in
            )
            print(f"{Fore.GREEN}✓ Signed URL created successfully{Style.RESET_ALL}")
            urls = [url["signedURL"] for url in response]
            return urls
        except Exception as e:
            print(f"{Fore.RED}✗ Error creating signed URL: {str(e)}{Style.RESET_ALL}")
            return None


SUPABASE_URL: https://laieiinzukjqqbaglafj.supabase.co
SECRET_SUPABASE_KEY: eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6ImxhaWVpaW56dWtqcXFiYWdsYWZqIiwicm9sZSI6InNlcnZpY2Vfcm9sZSIsImlhdCI6MTcyMzIzNzM3OSwiZXhwIjoyMDM4ODEzMzc5fQ.790Ig7ohd48aTMb05xdlptCSjcOvQfj8bfEguETHVgM


In [4]:
supabase = create_client(SUPABASE_URL, SECRET_SUPABASE_KEY)

BUCKET_NAME = "Interviews"
FOLDER_PATH = "MP4" 

files_list = supabase.storage.from_(BUCKET_NAME).list(FOLDER_PATH)

download_dir = "localMP4"
if not os.path.exists(download_dir):
    os.makedirs(download_dir)

for item in files_list:
    file_name = item["name"]
    if file_name.endswith(".mp4"):
        remote_path = f"{FOLDER_PATH}/{file_name}"
        print(f"Downloading {remote_path}...")

        # 7. Download the file
        data = supabase.storage.from_(BUCKET_NAME).download(remote_path)

        # 8. Save the file locally
        local_path = os.path.join(download_dir, file_name)
        with open(local_path, "wb") as f:
            f.write(data)
        print(f"Saved to {local_path}")

print("All MP4 files downloaded!")

Downloading MP4/P10.mp4...
Saved to localMP4/P10.mp4
Downloading MP4/P11.mp4...
Saved to localMP4/P11.mp4
Downloading MP4/P12.mp4...
Saved to localMP4/P12.mp4
Downloading MP4/P13.mp4...
Saved to localMP4/P13.mp4
Downloading MP4/P14.mp4...
Saved to localMP4/P14.mp4
Downloading MP4/P15.mp4...
Saved to localMP4/P15.mp4
Downloading MP4/P16.mp4...
Saved to localMP4/P16.mp4
Downloading MP4/P17.mp4...
Saved to localMP4/P17.mp4
Downloading MP4/P1.mp4...
Saved to localMP4/P1.mp4
Downloading MP4/P20.mp4...
Saved to localMP4/P20.mp4
Downloading MP4/P21.mp4...
Saved to localMP4/P21.mp4
Downloading MP4/P22.mp4...
Saved to localMP4/P22.mp4
Downloading MP4/P24.mp4...
Saved to localMP4/P24.mp4
Downloading MP4/P25.mp4...
Saved to localMP4/P25.mp4
Downloading MP4/P27.mp4...
Saved to localMP4/P27.mp4
Downloading MP4/P29.mp4...
Saved to localMP4/P29.mp4
Downloading MP4/P30.mp4...
Saved to localMP4/P30.mp4
Downloading MP4/P31.mp4...
Saved to localMP4/P31.mp4
Downloading MP4/P32.mp4...
Saved to localMP4/P32

In [5]:
supabase = create_client(SUPABASE_URL, SECRET_SUPABASE_KEY)

BUCKET_NAME = "Interviews"
FOLDER_PATH = "Audio" 

files_list = supabase.storage.from_(BUCKET_NAME).list(FOLDER_PATH)

download_dir = "localAudio"
if not os.path.exists(download_dir):
    os.makedirs(download_dir)

for item in files_list:
    file_name = item["name"]
    if file_name.endswith(".wav"):
        remote_path = f"{FOLDER_PATH}/{file_name}"
        print(f"Downloading {remote_path}...")

        # 7. Download the file
        data = supabase.storage.from_(BUCKET_NAME).download(remote_path)

        # 8. Save the file locally
        local_path = os.path.join(download_dir, file_name)
        with open(local_path, "wb") as f:
            f.write(data)
        print(f"Saved to {local_path}")

print("All WAV files downloaded!")

Downloading Audio/P66.wav...
Saved to localAudio/P66.wav
Downloading Audio/P67.wav...
Saved to localAudio/P67.wav
Downloading Audio/P70.wav...
Saved to localAudio/P70.wav
Downloading Audio/P72.wav...
Saved to localAudio/P72.wav
Downloading Audio/P73.wav...
Saved to localAudio/P73.wav
Downloading Audio/P8.wav...
Saved to localAudio/P8.wav
Downloading Audio/PP49.wav...
Saved to localAudio/PP49.wav
Downloading Audio/PP61.wav...
Saved to localAudio/PP61.wav
Downloading Audio/PP62.wav...
Saved to localAudio/PP62.wav
Downloading Audio/PP63.wav...
Saved to localAudio/PP63.wav
All WAV files downloaded!


In [3]:
db = SupabaseDB()
db.list_buckets

Initializing Supabase client...
✓ Supabase client initialized successfully


<bound method SupabaseDB.list_buckets of <__main__.SupabaseDB object at 0x107b4ecc0>>

In [None]:
supabase = SupabaseDB()
files = supabase.list_storage_files(
    bucket_id="Hume Output",
)
print(f"Found {len(files)}")

In [None]:
# HUME data

urls = supabase.create_signed_url(
    bucket_id="Hume Output",
    files=files,
    expires_in=3600
)

print(f"Generated {len(urls)} urls")

In [None]:
# prompt: when you're downloading each file add them to a list call json_objects

import json
import requests
from tqdm import tqdm

json_objects = []

for url in tqdm(urls, desc="Downloading files"):
    try:
        response = requests.get(url)
        response.raise_for_status()  # Raise an exception for bad status codes

        json_data = response.json()["results"]
        json_objects.append(json_data)

    except requests.exceptions.RequestException as e:
        print(f"Error downloading or parsing JSON from {url}: {e}")
    except json.JSONDecodeError as e:
        print(f"Error decoding JSON from {url}: {e}")

print(f"Downloaded {len(json_objects)} JSON objects")


In [None]:
# print(json_objects[0])
import json

list_json_objects = [json.dumps(json_objects[i],indent=4) for i in range(len(json_objects))]

print(list_json_objects[0])

In [None]:
files = supabase.list_storage_files(
    bucket_id="Interviews",
    folder_path="Labels"
)

labeled_scores_file = files[1]

print(files)

print(f"\nLabeled CSV {labeled_scores_file}")

labeled_score_url = supabase.create_signed_url(
    bucket_id="Interviews",
    files=[labeled_scores_file],
    expires_in=3600
)

print(f"\nLabeled Score URL {labeled_score_url}")

In [None]:
# Download the file
response = requests.get(labeled_score_url[0], stream=True)
response.raise_for_status()  # Raise an exception for bad status codes

# Save the file to a temporary location
temp_file_path = os.path.join("/tmp", "labeled_scores.csv")
with open(temp_file_path, 'wb') as f:
    for chunk in response.iter_content(chunk_size=8192):
        f.write(chunk)

# Create a pandas DataFrame
try:
    df = pd.read_csv(temp_file_path)
    print(df.head())
except pd.errors.ParserError as e:
    print(f"Error parsing CSV: {e}")
except FileNotFoundError:
    print(f"File not found at: {temp_file_path}")

In [None]:
print(df.head())


print()


print(df.columns)