In [28]:
# !pip install supabase
# !pip install colorama
from supabase import create_client
from typing import Optional, List, Dict, Union
from colorama import Fore, Style, init
from tqdm import tqdm
import time
import os
import pandas as pd
import json
import requests


In [8]:
# Initialize colorama
init()

SUPABASE_URL = os.environ.get("SUPABASE_URL")
SUPABASE_KEY = os.environ.get("SUPABASE_KEY_TEMP")

class SupabaseDB:
    def __init__(self):
        print(f"{Fore.CYAN}Initializing Supabase client...{Style.RESET_ALL}")

        self.client = create_client(SUPABASE_URL, SUPABASE_KEY)
        print(f"{Fore.GREEN}✓ Supabase client initialized successfully{Style.RESET_ALL}")

    def get_supabase_data(self, table: str, select_target: str, condition: list = None):
        """
        Fetch data from the given table with optional conditions.
        """
        print(f"{Fore.CYAN}Fetching data from table '{table}'...{Style.RESET_ALL}")
        query = self.client.table(table).select(select_target)
        if condition:
            query = query.eq(condition[0], condition[1])
        response = query.execute()
        print(f"{Fore.GREEN}✓ Data fetched successfully{Style.RESET_ALL}")
        return response

    def insert_supabase_data(self, table: str, data_for_insert: dict) -> dict:
        """
        Insert data into the given table.

        Args:
            table (str): The name of the table to insert into
            data_for_insert (dict): The data to insert

        Returns:
            dict: Response data or error message
        """
        print(f"{Fore.CYAN}Inserting data into table '{table}'...{Style.RESET_ALL}")
        try:
            with tqdm(total=1, desc="Inserting", unit="record") as pbar:
                response = self.client.table(table).insert(data_for_insert).execute()
                pbar.update(1)

            # Check if insert was successful
            if response and response.data:
                print(f"{Fore.GREEN}✓ Data inserted successfully{Style.RESET_ALL}")
                return {"success": True, "data": response.data[0]}

            print(f"{Fore.RED}✗ No data inserted{Style.RESET_ALL}")
            return {"success": False, "error": "No data inserted"}

        except Exception as e:
            print(f"{Fore.RED}✗ Error inserting data: {str(e)}{Style.RESET_ALL}")
            return {"success": False, "error": str(e)}

    def update_supabase_data(self, table: str, data_for_update: dict, condition: list):
        """
        Update data in the given table with conditions.
        """
        print(f"{Fore.CYAN}Updating data in table '{table}'...{Style.RESET_ALL}")
        try:
            with tqdm(total=1, desc="Updating", unit="record") as pbar:
                response = (
                    self.client.table(table)
                    .update(data_for_update)
                    .eq(condition[0], condition[1])
                    .execute()
                )
                pbar.update(1)

            if response.data:
                print(f"{Fore.GREEN}✓ Data updated successfully{Style.RESET_ALL}")
                return response.data
            else:
                print(f"{Fore.RED}✗ No data updated{Style.RESET_ALL}")
                return {"error": "No data updated."}
        except Exception as e:
            print(f"{Fore.RED}✗ Error updating data: {str(e)}{Style.RESET_ALL}")
            return {"error": str(e)}

    def list_buckets(self) -> Optional[List[dict]]:
        """List all storage buckets"""
        try:
            response = self.client.storage.list_buckets()
            if response:
                print(f"{Fore.GREEN}Found {len(response)} buckets{Style.RESET_ALL}")
            return response
        except Exception as e:
            print(f"{Fore.RED}Error listing buckets: {str(e)}{Style.RESET_ALL}")
            return None

    def get_bucket(self, bucket_id: str) -> Optional[dict]:
        """Get details of a specific bucket"""
        try:
            response = self.client.storage.get_bucket(bucket_id)
            if response:
                print(f"{Fore.GREEN}Retrieved bucket: {response.name}{Style.RESET_ALL}")
            return response
        except Exception as e:
            print(f"{Fore.RED}Error retrieving bucket {bucket_id}: {str(e)}{Style.RESET_ALL}")
            return None

    def list_storage_files(self, bucket_id: str, folder_path: str = "", limit: Optional[int] = None) -> Optional[List[str]]:
        """
        List files in a storage bucket with optional folder path and limit.

        Args:
            bucket_id (str): The ID of the storage bucket
            folder_path (str): Optional path to a specific folder
            limit (int, optional): Maximum number of files to return

        Returns:
            Optional[List[str]]: List of file paths or None if error occurs
        """
        print(f"{Fore.CYAN}Listing files in bucket '{bucket_id}'...{Style.RESET_ALL}")
        try:
            folder_path = folder_path.strip('/')
            options = {"limit": limit} if limit else None
            response = self.client.storage.from_(bucket_id).list(
                path=folder_path,
                options=options
            )
            file_list = [f"{folder_path}/{file['name']}" if folder_path else file['name']
                        for file in response if file['id']]
            print(f"{Fore.GREEN}✓ Found {len(file_list)} files{Style.RESET_ALL}")
            return file_list
        except Exception as e:
            print(f"{Fore.RED}✗ Error listing files: {str(e)}{Style.RESET_ALL}")
            return None

    def upload_file(self, bucket_id: str, destination_path: str, file_data) -> dict:
        """
        Upload a file to storage bucket.

        Args:
            bucket_id (str): The ID of the storage bucket
            destination_path (str): Path where file should be stored
            file_data: The file data to upload

        Returns:
            dict: Success status and response data or error message
        """
        print(f"{Fore.CYAN}Uploading file to '{destination_path}' in bucket '{bucket_id}'...{Style.RESET_ALL}")
        try:
            destination_path = destination_path.strip('/')
            # with tqdm(total=1, desc="Uploading", unit="file") as pbar:
            response = self.client.storage.from_(bucket_id).upload(
                path=destination_path,
                file=file_data,
                file_options={"upsert": "true"}
            )
                # pbar.update(1)
            print(f"{Fore.GREEN}✓ File uploaded successfully{Style.RESET_ALL}")
            # return {"success": True, "data": response}
        except Exception as e:
            print(f"{Fore.RED}✗ Error uploading file{Style.RESET_ALL}")
            # return {"success": False, "error": str(e)}

    def create_signed_url(self, bucket_id: str, files: List[str], expires_in: int = 3600) -> Optional[str]:
        """
        Create a signed URL for file access.

        Args:
            bucket_id (str): The ID of the storage bucket
            file_path (str): Path to the file
            expires_in (int): Number of seconds until URL expires

        Returns:
            Optional[str]: Signed URL or None if error occurs
        """
        print(f"{Fore.CYAN}Creating signed URL for '{files}'...{Style.RESET_ALL}")
        try:
            response = self.client.storage.from_(bucket_id).create_signed_urls(
                paths=files,
                expires_in=expires_in
            )
            print(f"{Fore.GREEN}✓ Signed URL created successfully{Style.RESET_ALL}")
            urls = [url["signedURL"] for url in response]
            return urls
        except Exception as e:
            print(f"{Fore.RED}✗ Error creating signed URL: {str(e)}{Style.RESET_ALL}")
            return None

In [10]:
supabase = SupabaseDB()
files = supabase.list_storage_files(
    bucket_id="Hume Output",
)
print(f"Found {len(files)}")

Initializing Supabase client...
✓ Supabase client initialized successfully
Listing files in bucket 'Hume Output'...
✓ Found 26 files
Found 26


In [11]:
urls = supabase.create_signed_url(
    bucket_id="Hume Output",
    files=files,
    expires_in=3600
)

print(f"Generated {len(urls)} urls")

Creating signed URL for '['.emptyFolderPlaceholder', 'PP1_20250213_171754.json', 'PP1_20250213_175522.json', 'PP20_20250213_171755.json', 'PP20_20250213_175524.json', 'PP21_20250213_175525.json', 'PP22_20250213_175526.json', 'PP24_20250213_175528.json', 'PP25_20250213_175529.json', 'PP27_20250213_175532.json', 'PP29_20250213_175533.json', 'PP30_20250213_175535.json', 'PP31_20250213_175537.json', 'PP3_20250213_175547.json', 'PP32_20250213_175539.json', 'PP33_20250213_175541.json', 'PP34_20250213_175542.json', 'PP35_20250213_175544.json', 'PP37_20250213_175546.json', 'PP42_20250213_175549.json', 'PP43_20250213_175551.json', 'PP44_20250213_175552.json', 'PP45_20250213_175554.json', 'PP47_20250213_175556.json', 'PP48_20250213_175557.json', 'PP49_20250213_175558.json']'...
✓ Signed URL created successfully
Generated 26 urls


In [18]:
# prompt: when you're downloading each file add them to a list call json_objects

import json
import requests
from tqdm import tqdm

json_objects = []

for url in tqdm(urls, desc="Downloading files"):
    try:
        response = requests.get(url)
        response.raise_for_status()  # Raise an exception for bad status codes

        json_data = response.json()["results"]
        json_objects.append(json_data)

    except requests.exceptions.RequestException as e:
        print(f"Error downloading or parsing JSON from {url}: {e}")
    except json.JSONDecodeError as e:
        print(f"Error decoding JSON from {url}: {e}")

print(f"Downloaded {len(json_objects)} JSON objects")


Downloading files:   0%|          | 0/26 [00:00<?, ?it/s]

Error downloading or parsing JSON from https://laieiinzukjqqbaglafj.supabase.co/storage/v1/object/sign/Hume%20Output/.emptyFolderPlaceholder?token=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1cmwiOiJIdW1lIE91dHB1dC8uZW1wdHlGb2xkZXJQbGFjZWhvbGRlciIsImlhdCI6MTczOTQ4ODQ2MCwiZXhwIjoxNzM5NDkyMDYwfQ.SzGDgTugedJ2YZ8ktMRw7Q5v5EZHa4Ou3B9hIorGMz4: Expecting value: line 1 column 1 (char 0)


Downloading files: 100%|██████████| 26/26 [00:12<00:00,  2.09it/s]


Downloaded 25 JSON objects


In [22]:
# print(json_objects[0])
import json

list_json_objects = [json.dumps(json_objects[i],indent=4) for i in range(len(json_objects))]

print(list_json_objects[0])

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
                "position": {
                    "begin": 2290,
                    "end": 2298
                },
                "time_start": 151.8889,
                "time_end": 152.2871
            },
            {
                "emotions": {
                    "Admiration": 0.06802693754434586,
                    "Adoration": 0.014236560091376305,
                    "Aesthetic Appreciation": 0.046596962958574295,
                    "Amusement": 0.0084151616320014,
                    "Anger": 0.000794597843196243,
                    "Annoyance": 0.018444186076521873,
                    "Anxiety": 0.006792361848056316,
                    "Awe": 0.023422591388225555,
                    "Awkwardness": 0.023656191304326057,
                    "Boredom": 0.018863754346966743,
                    "Calmness": 0.11164998263120651,
                    "Concentration": 0.10988081246614456,
                    "Co

In [26]:
files = supabase.list_storage_files(
    bucket_id="Interviews",
    folder_path="Labels"
)

labeled_scores_file = files[1]

print(files)

print(f"\nLabeled CSV {labeled_scores_file}")

labeled_score_url = supabase.create_signed_url(
    bucket_id="Interviews",
    files=[labeled_scores_file],
    expires_in=3600
)

print(f"\nLabeled Score URL {labeled_score_url}")

Listing files in bucket 'Interviews'...
✓ Found 2 files
['Labels/interview_transcripts_by_turkers.csv', 'Labels/turker_scores_full_interview.csv']

Labeled CSV Labels/turker_scores_full_interview.csv
Creating signed URL for '['Labels/turker_scores_full_interview.csv']'...
✓ Signed URL created successfully

Labeled Score URL ['https://laieiinzukjqqbaglafj.supabase.co/storage/v1/object/sign/Interviews/Labels/turker_scores_full_interview.csv?token=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1cmwiOiJJbnRlcnZpZXdzL0xhYmVscy90dXJrZXJfc2NvcmVzX2Z1bGxfaW50ZXJ2aWV3LmNzdiIsImlhdCI6MTczOTQ4OTE3MywiZXhwIjoxNzM5NDkyNzczfQ.JaQIl23Yx2_UlseohVfe9Y_gSF6P54kY_bAF2SVJKro']


In [31]:
# Download the file
response = requests.get(labeled_score_url[0], stream=True)
response.raise_for_status()  # Raise an exception for bad status codes

# Save the file to a temporary location
temp_file_path = os.path.join("/tmp", "labeled_scores.csv")
with open(temp_file_path, 'wb') as f:
    for chunk in response.iter_content(chunk_size=8192):
        f.write(chunk)

# Create a pandas DataFrame
try:
    df = pd.read_csv(temp_file_path)
    print(df.head())
except pd.errors.ParserError as e:
    print(f"Error parsing CSV: {e}")
except FileNotFoundError:
    print(f"File not found at: {temp_file_path}")

  Participant          Worker  Overall  RecommendHiring  Colleague  Engaged  \
0          p1  A1ITBXITLY2952      5.0              5.0        6.0      6.0   
1          p1  A1OLRUT93TXWEP      6.0              6.0        7.0      5.0   
2          p1  A1ZQ7A1CUV6RD8      6.0              6.0        3.0      7.0   
3          p1   A2F7D8EOTB663      6.0              5.0        6.0      7.0   
4          p1  A323WW03VM8089      3.0              3.0        4.0      4.0   

   Excited  EyeContact  Smiled  SpeakingRate  ...  Friendly  Paused  \
0      5.0         5.0     4.0          4.00  ...       6.0     6.0   
1      4.0         6.0     5.0          5.00  ...       5.0     7.0   
2      6.0         7.0     4.0          3.50  ...       4.0     6.0   
3      5.0         7.0     4.0          5.00  ...       5.0     7.0   
4      4.0         4.0     3.0          5.25  ...       5.0     4.0   

   EngagingTone  StructuredAnswers  Calm  NotStressed  Focused  Authentic  \
0           5.0      

In [34]:
print(df.head())


print()


print(df.columns)

  Participant          Worker  Overall  RecommendHiring  Colleague  Engaged  \
0          p1  A1ITBXITLY2952      5.0              5.0        6.0      6.0   
1          p1  A1OLRUT93TXWEP      6.0              6.0        7.0      5.0   
2          p1  A1ZQ7A1CUV6RD8      6.0              6.0        3.0      7.0   
3          p1   A2F7D8EOTB663      6.0              5.0        6.0      7.0   
4          p1  A323WW03VM8089      3.0              3.0        4.0      4.0   

   Excited  EyeContact  Smiled  SpeakingRate  ...  Friendly  Paused  \
0      5.0         5.0     4.0          4.00  ...       6.0     6.0   
1      4.0         6.0     5.0          5.00  ...       5.0     7.0   
2      6.0         7.0     4.0          3.50  ...       4.0     6.0   
3      5.0         7.0     4.0          5.00  ...       5.0     7.0   
4      4.0         4.0     3.0          5.25  ...       5.0     4.0   

   EngagingTone  StructuredAnswers  Calm  NotStressed  Focused  Authentic  \
0           5.0      