In [8]:
# Cell 1: Setup - Add project root to sys.path and install necessary libraries
# This ensures we can import from the 'src' directory if needed later
import sys
import os

# Get the absolute path to the project root directory
project_root = os.path.abspath(os.path.join(os.getcwd(), ".."))
if project_root not in sys.path:
    sys.path.insert(0, project_root)
print("Project root added to sys.path:", project_root)
print("Current working directory:", os.getcwd())

# Ensure necessary libraries are installed
# !pip install supabase python-dotenv
print("\n--- Setup Complete ---")

Project root added to sys.path: c:\Users\wbrya\OneDrive\Documents\GitHub\AI-CFO-FYP
Current working directory: c:\Users\wbrya\OneDrive\Documents\GitHub\AI-CFO-FYP\notebooks

--- Setup Complete ---


In [9]:
# Cell 2: Import necessary modules and classes
import os
import io
import uuid
from dotenv import load_dotenv
from supabase import create_client, Client # Import necessary Supabase classes

print("Imported necessary modules.")
print("\n--- Imports Complete ---")

Imported necessary modules.

--- Imports Complete ---


In [10]:
# Cell 3: Load Environment Variables and Initialize Supabase Client
load_dotenv() # Load variables from .env file in the project root

# Get Supabase credentials from environment variables
supabase_url = os.environ.get("SUPABASE_URL")
supabase_key = os.environ.get("SUPABASE_ANON_KEY") # Use the ANON key for client-side operations

# --- Test User Credentials (MUST match the TEST_USER_UID below) ---
test_email = os.environ.get("TEST_EMAIL")
test_password = os.environ.get("TEST_PASSWORD")
# --- Target User UID (The user identified by test_email MUST have this UID) ---
TEST_USER_UID = "e222921f-cfdc-4a05-8cf2-aea13004bcf2"

print(f"SUPABASE_URL Loaded: {'Yes' if supabase_url else 'No'}")
print(f"SUPABASE_ANON_KEY Loaded: {'Yes' if supabase_key else 'No'}")
print(f"TEST_EMAIL Loaded: {'Yes' if test_email else 'No'}")
print(f"TEST_PASSWORD Loaded: {'Yes' if test_password else 'No'}")
print(f"Target TEST_USER_UID: {TEST_USER_UID}")

# Basic validation
if not all([supabase_url, supabase_key, test_email, test_password]):
    print("\nError: Missing required Supabase credentials or test user info in .env file.")
    supabase: Client | None = None # Set client to None
else:
    try:
        # Initialize the Supabase client using URL and ANON key
        print("\nInitializing Supabase client...")
        supabase: Client | None = create_client(supabase_url, supabase_key)
        print("Supabase client initialized successfully.")
    except Exception as e:
        print(f"\nError initializing Supabase client: {e}")
        supabase = None

print("\n--- Client Initialization Complete ---")

SUPABASE_URL Loaded: Yes
SUPABASE_ANON_KEY Loaded: Yes
TEST_EMAIL Loaded: Yes
TEST_PASSWORD Loaded: Yes
Target TEST_USER_UID: e222921f-cfdc-4a05-8cf2-aea13004bcf2

Initializing Supabase client...
Supabase client initialized successfully.

--- Client Initialization Complete ---


In [11]:
# Cell 4: Authenticate the Test User
session = None # Initialize session variable
user = None    # Initialize user variable

if supabase:
    print(f"\nAttempting to sign in user: {test_email}...")
    try:
        # Sign in using the test email and password
        response = supabase.auth.sign_in_with_password(
            {"email": test_email, "password": test_password}
        )

        # Check if login was successful and extract session/user
        if response and response.session and response.user:
            session = response.session
            user = response.user
            print(f"Sign-in successful for user ID: {user.id}")

            # --- CRUCIAL CHECK ---
            # Verify the logged-in user's ID matches the target TEST_USER_UID
            if str(user.id) == TEST_USER_UID:
                print(f"Success: Logged-in user ID ({user.id}) matches the target TEST_USER_UID.")
            else:
                print(f"CRITICAL ERROR: Logged-in user ID ({user.id}) does NOT match the target TEST_USER_UID ({TEST_USER_UID}).")
                print("RLS policies will NOT work as expected for uploads/downloads.")
                print("Please ensure TEST_EMAIL in .env corresponds to the user with the specified TEST_USER_UID.")
                # Invalidate session/user to prevent accidental incorrect operations
                session = None
                user = None
                # raise AssertionError("Logged-in user ID does not match target TEST_USER_UID") # Optional: stop execution

        else:
            # Handle potential failed login (e.g., wrong password, user not found)
            print("Sign-in failed. Response did not contain expected session/user data.")
            # Attempt to extract error if available (structure might vary)
            error_message = getattr(response, 'error', 'Unknown error')
            if hasattr(response, 'message'): error_message = response.message
            print(f"Sign-in error details: {error_message}")

    except Exception as e:
        # Catch other exceptions during sign-in
        print(f"An error occurred during sign-in: {e}")

else:
    print("\nSkipping authentication because Supabase client failed to initialize.")

print("\n--- Authentication Attempt Complete ---")


Attempting to sign in user: wbryanlai@gmail.com...
Sign-in successful for user ID: e222921f-cfdc-4a05-8cf2-aea13004bcf2
Success: Logged-in user ID (e222921f-cfdc-4a05-8cf2-aea13004bcf2) matches the target TEST_USER_UID.

--- Authentication Attempt Complete ---


In [12]:
# Cell 5: Upload Test PDF to Supabase Storage

# --- Configuration ---
BUCKET_NAME = 'financial-pdfs' # Must match the bucket name in storage_setup.sql
PDF_RELATIVE_PATH = "../data/source_pdfs/invoice.pdf" # Path relative to notebook location
# --- End Configuration ---

# Proceed only if authentication was successful and user ID matched
if supabase and session and user and str(user.id) == TEST_USER_UID:
    print("\n--- Starting PDF Upload Test ---")

    # Get the absolute path to the PDF
    pdf_full_path = os.path.abspath(os.path.join(os.getcwd(), PDF_RELATIVE_PATH))

    # Check if the file exists
    if not os.path.exists(pdf_full_path):
        print(f"Error: Test PDF not found at {pdf_full_path}")
    else:
        print(f"Found test PDF: {pdf_full_path}")

        # Construct the storage path: MUST start with the user's UID for RLS policies
        storage_path = f"{TEST_USER_UID}/{os.path.basename(pdf_full_path)}"
        print(f"Target storage path: {storage_path} in bucket '{BUCKET_NAME}'")

        try:
            # Open the file in binary read mode
            with open(pdf_full_path, 'rb') as f:
                print(f"Attempting to upload file...")
                # Upload the file using the authenticated client
                # The client uses the session JWT automatically
                response = supabase.storage.from_(BUCKET_NAME).upload(
                    path=storage_path,
                    file=f,
                    # upsert=False ensures it fails if file exists (safer for testing)
                    # Set upsert=True if you want to overwrite, requires UPDATE policy
                    file_options={"cache-control": "3600", "upsert": "false"}
                )
                # Check response: supabase-py storage upload returns the object key on success
                # or raises an exception on failure (like policy violation)
                print("Upload call completed.")
                # If no exception was raised, assume success for this simple test
                print(f"SUCCESS: File uploaded successfully to path: {storage_path}")
                # Optional: Verify response structure if needed, though lack of exception is primary indicator

        except Exception as e:
            # Catch potential errors:
            # - StorageException (e.g., RLS policy violation, file exists with upsert=false)
            # - Other network/API errors
            print(f"UPLOAD FAILED: An error occurred during upload: {e}")
            # Check if it looks like an RLS error (common message format)
            if "new row violates row-level security policy" in str(e):
                print("Hint: This often means the RLS policy failed. Check:")
                print(f"  - Is the bucket name ('{BUCKET_NAME}') correct?")
                print(f"  - Does the storage path ('{storage_path}') start with the correct logged-in user ID?")
                print(f"  - Are the INSERT/UPDATE RLS policies correctly defined in Supabase?")

else:
    print("\nSkipping PDF upload test because authentication failed or user ID mismatch.")

print("\n--- PDF Upload Test Complete ---")


--- Starting PDF Upload Test ---
Found test PDF: c:\Users\wbrya\OneDrive\Documents\GitHub\AI-CFO-FYP\data\source_pdfs\invoice.pdf
Target storage path: e222921f-cfdc-4a05-8cf2-aea13004bcf2/invoice.pdf in bucket 'financial-pdfs'
Attempting to upload file...
Upload call completed.
SUCCESS: File uploaded successfully to path: e222921f-cfdc-4a05-8cf2-aea13004bcf2/invoice.pdf

--- PDF Upload Test Complete ---


In [13]:
# Cell 6: (Optional but Recommended) Verify Upload by Listing Files

if supabase and session and user and str(user.id) == TEST_USER_UID:
    print("\n--- Verifying Upload by Listing Files ---")
    try:
        # List files in the user's root directory within the bucket
        # The path argument to list() specifies the folder prefix
        file_list_response = supabase.storage.from_(BUCKET_NAME).list(path=TEST_USER_UID)

        print(f"Files listed in folder '{TEST_USER_UID}':")
        found_uploaded_file = False
        expected_filename = os.path.basename(pdf_full_path)

        if file_list_response:
            for file_object in file_list_response:
                print(f"  - {file_object.get('name')} (ID: {file_object.get('id')}, Size: {file_object.get('metadata', {}).get('size')} bytes)")
                if file_object.get('name') == expected_filename:
                    found_uploaded_file = True
                    print(f"  ^^^ Uploaded file '{expected_filename}' found!")
        else:
            print("  No files found or empty response.")

        if found_uploaded_file:
            print("\nVERIFICATION SUCCESS: Uploaded file found in user's folder.")
        else:
            print(f"\nVERIFICATION FAILED: Uploaded file '{expected_filename}' not found in the list.")

    except Exception as e:
        print(f"An error occurred while listing files: {e}")
else:
    print("\nSkipping file listing verification.")

print("\n--- File Listing Verification Complete ---")


--- Verifying Upload by Listing Files ---
Files listed in folder 'e222921f-cfdc-4a05-8cf2-aea13004bcf2':
  - invoice.pdf (ID: 4cc46351-df60-46c0-a5cd-695fde2e4052, Size: 2050 bytes)
  ^^^ Uploaded file 'invoice.pdf' found!

VERIFICATION SUCCESS: Uploaded file found in user's folder.

--- File Listing Verification Complete ---


In [14]:
# Cell 7: (Optional but Recommended) Cleanup - Delete the Uploaded File

# Construct the path again for deletion
storage_path_to_delete = f"{TEST_USER_UID}/{os.path.basename(pdf_full_path)}"

if supabase and session and user and str(user.id) == TEST_USER_UID:
    print(f"\n--- Cleaning Up - Deleting Uploaded File ---")
    print(f"Attempting to delete: {storage_path_to_delete}")
    try:
        # Delete the specific file using its path
        delete_response = supabase.storage.from_(BUCKET_NAME).remove([storage_path_to_delete])

        # Check response (often contains list of deleted objects)
        if delete_response: # Check if response is not empty/None
            print("Delete call successful.")
            # Optionally verify the content of delete_response if needed
            deleted_successfully = False
            for deleted_item in delete_response:
                 if deleted_item.get('name') == os.path.basename(pdf_full_path):
                     deleted_successfully = True
                     print(f"  Confirmed deletion of: {deleted_item.get('name')}")
                     break
            if not deleted_successfully:
                 print("  Warning: File name not found in delete response details.")
        else:
             print("  Delete call returned empty response (might still be successful, check manually if needed).")

    except Exception as e:
        print(f"An error occurred during file deletion: {e}")
        if "Object not found" in str(e):
             print("  Hint: File might have already been deleted or listing verification failed.")

else:
    print("\nSkipping file deletion cleanup.")

print("\n--- Cleanup Complete ---")

# Optional: Sign out the user at the end of the test
# if supabase and session:
#    print("\nSigning out test user...")
#    supabase.auth.sign_out()
#    print("User signed out.")


--- Cleaning Up - Deleting Uploaded File ---
Attempting to delete: e222921f-cfdc-4a05-8cf2-aea13004bcf2/invoice.pdf
Delete call successful.

--- Cleanup Complete ---
