In [1]:
from dotenv import load_dotenv
import os
import asyncio
from supabase import create_client, Client
load_dotenv()  # Load environment variables from .env file

# Initialize Supabase
SUPABASE_URL = os.getenv('SUPABASE_URL')
SUPABASE_KEY = os.getenv('SUPABASE_KEY')
supabase = create_client(SUPABASE_URL, SUPABASE_KEY)

In [None]:
def upload_file(bucket: str, file_path: str, file_content):
    """Upload a file to storage."""
    return supabase.storage.from_(bucket).upload(file_path, file_content)

bucket="media"
file_path="uploads/p1.jpg"




In [None]:
import uuid
def make_uuid():
    return str(uuid.uuid4())

test = make_uuid()
print(test)

In [None]:
import cv2
import uuid

def make_uuid():
    return str(uuid.uuid4())

def get_clip_data(video_path, output_folder='uploads', file_extension='png'):
    # Initialize VideoCapture object
    cap = cv2.VideoCapture(video_path)

    # Initialize dictionary to store video info
    video_info = {}

    # Check if the video file was opened successfully
    if not cap.isOpened():
        print(f"Error: Could not open video file {video_path}")
        return None

    # Read the first frame from the video
    ret, frame = cap.read()

    # If the first frame was read successfully
    if ret:
        unique_name = make_uuid()
        output_path = f"{output_folder}/{unique_name}.{file_extension}"
        cv2.imwrite(output_path, frame)

        # Save video information
        video_info['thumbnail'] = output_path
        video_info['width'] = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        video_info['height'] = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        video_info['fps'] = int(cap.get(cv2.CAP_PROP_FPS))
        video_info['frame_count'] = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        video_info['duration'] = video_info['frame_count'] / video_info['fps']
        video_info['fourcc'] = int(cap.get(cv2.CAP_PROP_FOURCC))
        video_info['is_color'] = True if len(frame.shape) == 3 else False
        video_info['aspect_ratio'] = video_info['width'] / video_info['height']

        # Release the VideoCapture object
        cap.release()

        return video_info
    else:
        print("Failed to retrieve the first frame")
        return None


In [None]:
import os
import cv2
from PIL import Image

def identify_media_type(file_path):
    # List of file extensions for video and image files
    video_extensions = ['.mp4', '.mkv', '.flv', '.avi']
    image_extensions = ['.jpg', '.jpeg', '.png', '.bmp', '.gif', '.webp']

    # Get the file extension
    file_extension = os.path.splitext(file_path)[1].lower()

    if file_extension in video_extensions:
        return 'video'
    elif file_extension in image_extensions:
        return 'image'
    
    else:
        # If the file extension doesn't match, try reading the file
        try:
            img = Image.open(file_path)
            img.close()
            return 'image'
        except:
            pass

        try:
            cap = cv2.VideoCapture(file_path)
            if cap.isOpened():
                cap.release()
                return 'video'
        except:
            pass
    
    return 'unknown'


In [None]:
# caption image 
import replicate

def get_caption(file_path):
    try:
        with open(file_path, "rb") as file:
            input_data = {"image": file}
            output = replicate.run(
                "salesforce/blip:2e1dddc8621f72155f24cf2e0adbde548458d3cab9f00c0139eea840d0ac4746",
                input=input_data
            )
            return output
    except Exception as e:
        return str(e)




def get_caption_from_binary(file_contents):
    try:
        input_data = {"image": file_contents}
        output = replicate.run(
            "salesforce/blip:2e1dddc8621f72155f24cf2e0adbde548458d3cab9f00c0139eea840d0ac4746",
            input=input_data
        )
        return output
    except Exception as e:
        return str(e)




# path = "uploads/p1.jpg"
# get_caption(path)

In [3]:
import openai
from openai.embeddings_utils import get_embedding, cosine_similarity

def get_embeddings(input = ""):
    """Get the vector embeddings from openAI for a given input string."""


    # replace new lines with spaces
    input = input.replace("\n", " ")

    # remove extra spaces
    input = " ".join(input.split())


    response = openai.Embedding.create( 
        input= input,
        model="text-embedding-ada-002"
    )

    embeddings = response['data'][0]['embedding']
    return embeddings


def get_similarity(input1, input2):
    """Get the similarity between two strings."""

    embedding1 = get_embeddings(input1)
    embedding2 = get_embeddings(input2)

    similarity = cosine_similarity(embedding1, embedding2)
    return similarity

similarity = get_similarity("I like large bottoms", "I like big butts")
print (similarity)


0.9430430854155556


In [41]:
def get_image_data(image_path):
    # Read the image
    image = cv2.imread(image_path)

    # If the image was read successfully
    if image is not None:
        # Get image dimensions
        height, width, channels = image.shape
        
        # Calculate mean color
        mean_color = cv2.mean(image)
        
        # Create and populate a dictionary with the image data
        image_data = {
            'dimensions': {
                'width': width,
                'height': height,
                'channels': channels
            },
            'mean_color': mean_color,
            'file_size': os.path.getsize(image_path),
            'file_type': os.path.splitext(image_path)[1][1:]  # Get the extension without the dot
        }
        
        return image_data
    else:
        print(f"Error: Could not read image at {image_path}")
        return None


In [None]:
def upload_media_to_supabase(filepath):
    
    bucket = "media"

    # split the filepath to get the filename and the extension
    filename, file_extension = os.path.splitext(filepath)
    # get the actual filename
    filename = os.path.basename(filename)
    
    # identify file type = video, image, unknown
    file_type = identify_media_type(filepath)
    print(file_type)

    if file_type == "video":

        # read the file content
        with open(filepath, "rb") as f:
            file_content = f.read()

        # rename with uuid
        new_filename = make_uuid() + file_extension

        # upload the file to Bucket.
        print("upload started")
        supabase_resp = upload_file(bucket, new_filename, file_content)
        print(supabase_resp.status_code)

        if supabase_resp.status_code != 200:
            return None
    

        # setup data for Media table
        else:

            print("getting thumbnail")
            # get the thumbnail from the video
            clip_data = get_clip_data(filepath)
            print(clip_data)
            
            thumbnail_local_path = clip_data["thumbnail"]


            thumbnail_binary = open(thumbnail_local_path, "rb").read()
            print("uploading thumbnail")
            # upload the thumbnail to Bucket.
            thumbnail_name = new_filename+"_thumbnail"+".png"
            supabase_resp = upload_file(bucket, thumbnail_name, thumbnail_binary)
            thumbnail_url = f"https://{supabase_subdiman}.supabase.co/storage/v1/object/public/{bucket}/{thumbnail_name}"


        
            print("getting caption")
            # caption the thumbnail
            caption = get_caption(thumbnail_local_path)
            prefix = "Caption: "
            if caption.startswith(prefix):
                caption = caption[len(prefix):]
            print(caption)


            public_url =  f"https://{supabase_subdiman}.supabase.co/storage/v1/object/public/{bucket}/{new_filename}"
            table = "media"
            data = {
                "url": public_url,
                "type": file_type,
                "caption": caption,
                "old_filename": filename+"."+file_extension,
                "bucket": bucket,
                "filename": new_filename,
                "media_data": clip_data,
                "thumbnail_url": thumbnail_url,
            }
        
            # upload to media database
            print("uploading to media database")
            resp = funcs_supabase.insert_data(table, data)
            print(resp)

            print(resp.data)

            media_id  = resp.data[0]['id']

            # vectorize the caption
            print("vectorizing caption")
            caption_vector = get_embeddings(caption)

            # setup data for vectors table
            # add the vector in the vectors table with a link to the ID of the media table.

            table = "vectors"
            data = {"context" : "video_thumbnail_caption", "content": caption, "embedding": caption_vector, "media_table_id": media_id}

            print("uploading to vectors database")
            supabase_insert = funcs_supabase.insert_data(table, data)
            print(supabase_insert)


            # delete the thumbnail
            delete = os.remove(thumbnail_local_path)
            print("deleted thumbnail")


path ="/Users/georgebennett/Downloads/Snapinsta.app_video_25450ADFC1A80FB995ED4CEA4B03B581_video_dashinit.mp4"
resp= upload_media_to_supabase(path)


In [7]:
import os
import uuid
import cv2
import funcs_supabase

# Generates a unique identifier
def make_uuid():
    return str(uuid.uuid4())

supabase_subdomain = "fpyltvtkpkrkzortucoa"

# Uploads a file to Supabase and returns the public URL
def upload_to_supabase(bucket, filepath):
    # Read the file content
    with open(filepath, "rb") as f:
        file_content = f.read()

    # Rename the file using a unique identifier
    filename, file_extension = os.path.splitext(filepath)
    new_filename = make_uuid() + file_extension

    # Upload the file to the bucket
    supabase_resp = upload_file(bucket, new_filename, file_content)
    
    if supabase_resp.status_code == 200:
        return f"https://{supabase_subdomain}.supabase.co/storage/v1/object/public/{bucket}/{new_filename}"
    else:
        return None


# Uploads a file to Supabase and returns the public URL
# Main function to upload media to Supabase
def upload_media_to_supabase(filepath):
    bucket = "media"

    # Identify the file type (video, image, or unknown)
    file_type = identify_media_type(filepath)

    if file_type == "video":
        # Process video
        process_video(bucket, filepath)
        
    elif file_type == "image":
        # Process image
        process_image(bucket, filepath)

    else:
        print("Unsupported file type")


def process_video(bucket, filepath):
        file_type = "video"
        public_url = upload_to_supabase(bucket, filepath)
        
        if public_url:
            # Get thumbnail and other video clip data
            clip_data = get_clip_data(filepath)
            thumbnail_local_path = clip_data["thumbnail"]
            
            # Upload the thumbnail to Supabase
            thumbnail_url = upload_to_supabase(bucket, thumbnail_local_path)

            # Get the caption for the thumbnail
            caption = get_caption(thumbnail_local_path).replace("Caption: ", "")
            
            # Insert media info into 'media' table
            media_data = {
                "url": public_url,
                "type": file_type,
                "caption": caption,
                "old_filename": os.path.basename(filepath),
                "bucket": bucket,
                "filename": os.path.basename(public_url),
                "media_data": clip_data,
                "thumbnail_url": thumbnail_url,
            }
            resp = funcs_supabase.insert_data("media", media_data)
            media_id = resp.data[0]['id']

            # Get caption embeddings and insert into 'vectors' table
            caption_vector = get_embeddings(caption)
            vector_data = {
                "context": "video_thumbnail_caption",
                "content": caption,
                "embedding": caption_vector,
                "media_table_id": media_id
            }
            funcs_supabase.insert_data("vectors", vector_data)

            # Delete the local thumbnail file
            os.remove(thumbnail_local_path)



# Function to handle image files
def process_image(bucket, filepath):
    # Upload the image to Supabase and get the public URL
    public_url = upload_to_supabase(bucket, filepath)
    
    print("uploaded")

    if public_url:
        # Get the caption for the image
        caption = get_caption(filepath).replace("Caption: ", "")
        print(caption)

        image_data = get_image_data(filepath)
        print(image_data)
        
        # Insert media info into 'media' table
        media_data = {
            "url": public_url,
            "type": "image",
            "caption": caption,
            "old_filename": os.path.basename(filepath),
            "bucket": bucket,
            "filename": os.path.basename(public_url),
            "media_data": image_data,
        }
        resp = funcs_supabase.insert_data("media", media_data)
        media_id = resp.data[0]['id']

        # Get caption embeddings and insert into 'vectors' table
        caption_vector = get_embeddings(caption)
        vector_data = {
            "context": "image_media_caption",
            "content": caption,
            "embedding": caption_vector,
            "media_table_id": media_id
        }
        funcs_supabase.insert_data("vectors", vector_data)



file = "/Users/georgebennett/Downloads/Screenshot at Aug 03 20-26-30.png"
upload_media_to_supabase(file)


NameError: name 'identify_media_type' is not defined

In [4]:
text = "chorizo"
text_vector = get_embeddings(text)
print(text_vector)

[0.004039374180138111, 0.004029342904686928, -0.016425006091594696, -0.02422286942601204, -0.02705845795571804, 0.011054777540266514, -0.018498193472623825, -0.013174780644476414, -0.014819956384599209, -0.04665343463420868, 0.019327469170093536, 0.011790425516664982, -0.03504357859492302, 0.013569355010986328, -0.010579950176179409, 0.006259692832827568, 0.040420494973659515, 0.00214006588794291, 0.03365253657102585, -0.011843927204608917, -0.024530503898859024, 0.008219189941883087, -0.0028506345115602016, -0.01456582359969616, -0.002250412944704294, -0.0015757906949147582, 0.01942109689116478, -0.00881439633667469, -0.0012338816886767745, -0.0035745787899941206, 0.030281931161880493, 0.023340092971920967, -0.021079648286104202, -0.0036849258467555046, -0.02996092103421688, -0.01998286508023739, 0.01372317224740982, -0.011235346086323261, 0.005881837103515863, -0.01079395692795515, 0.007383227348327637, -0.0022069429978728294, -0.007436729036271572, -0.017789296805858612, -0.01964847

In [None]:
import requests
import json

url = "https://fpyltvtkpkrkzortucoa.supabase.co/rest/v1/rpc/similarity_search"

# Replace these with your actual values
# match_count = "your_value_here"
# match_threshold = "your_value_here"
# query_embedding = "your_value_here"
# SUPABASE_KEY = "your_supabase_key_here"

headers = {
    "Content-Type": "application/json",
    "apikey": SUPABASE_KEY,
    "Authorization": f"Bearer {SUPABASE_KEY}"
}

data = {
    "match_count": match_count,
    "match_threshold": match_threshold,
    "query_embedding": text_vector
}

response = requests.post(url, headers=headers, json=data)

# Handle the response
if response.status_code == 200:
    print("Success:", response.json())
else:
    print("Failed:", response.status_code, response.text)



In [None]:
print(SUPABASE_KEY)

In [None]:
def run_supabase_func (function_name, data):


    url = f"https://fpyltvtkpkrkzortucoa.supabase.co/rest/v1/rpc/{function_name}"
    headers = {
    "Content-Type": "application/json",
    "apikey": SUPABASE_KEY,
    "Authorization": f"Bearer {SUPABASE_KEY}"
    }

    response = requests.post(url, headers=headers, json=data)

        # Handle the response
    if response.status_code == 200:
        print("Success:", response.json())

        return response.json()

    else:
        print("Failed:", response.status_code, response.text)









In [None]:
function_name = "search_vectors"
data = {
    "query_embedding": text_vector
}

run_supabase_func(function_name, data)


# SELECT * FROM search_vectors(
#     array[...],  -- your query_embedding value here
#     0.5,         -- your match_threshold value here
#     'context',   -- your exact_context value here
#     'key',       -- your key for JSONB search
#     'value'      -- your value for JSONB search
# );






In [43]:
# https://.supabase.co/storage/v1/object/public//
# https://fpyltvtkpkrkzortucoa.supabase.co/storage/v1/render/image/public/media/0bd35f13-e222-4504-9362-5183bda46a6b.png?width=500&height=600
# resize: 'contain', // 'cover' | 'fill'
# https://fpyltvtkpkrkzortucoa.supabase.co/storage/v1/render/image/public/media/0bd35f13-e222-4504-9362-5183bda46a6b.png?width=300&height=300&resize=cover

SyntaxError: invalid binary literal (550822857.py, line 1)

In [11]:
pip install psycopg2

Note: you may need to restart the kernel to use updated packages.


In [20]:
text = "Mobile Phone"
text_vector = get_embeddings(text)
print(text_vector)

[-0.022289378568530083, 0.0009077159338630736, 0.005444658454507589, -0.020613091066479683, -0.024633560329675674, 0.012205463834106922, -0.015427078120410442, -0.003311648964881897, -0.00836179032921791, -0.020613091066479683, 0.012133436277508736, 0.03470437973737717, -0.006312267854809761, 0.008302859030663967, -0.02540622465312481, 0.019277298822999, 0.018818940967321396, 0.008001650683581829, 0.016343798488378525, -0.0096386494114995, -0.023219192400574684, 0.010666685178875923, -0.0017548628384247422, -0.01720813289284706, 0.0018481718143448234, 0.008014746941626072, -0.0012735851341858506, -0.012231656350195408, -0.0033623958006501198, -0.012899551540613174, 0.041095223277807236, -0.007274823263287544, -0.011812584474682808, -0.0160163976252079, -0.02907310239970684, 0.02572052739560604, 0.0005377541529014707, -0.0014823025558143854, 0.010522629134356976, -0.010293449275195599, 0.01936897076666355, 0.02479071170091629, -0.0005905473954044282, 0.004848790820688009, -0.02359897643

In [5]:
import os
import psycopg2
from dotenv import load_dotenv

load_dotenv()

def search_vectors(query_embedding, match_threshold=0.7, match_count=10):
    # Load PostgreSQL full URL from .env file
    SUPABASE_DB_FULL_URL = os.getenv("SUPABASE_DB_FULL_URL")
    
    # Initialize connection and cursor
    conn = psycopg2.connect(SUPABASE_DB_FULL_URL)
    cursor = conn.cursor()
    
    try:
        # Prepare SQL query
        # Prepare SQL query
        sql_query = """
        SELECT
            vectors.id,
            vectors.content,
            1 - (vectors.embedding <=> %s::vector) AS similarity
        FROM vectors
        WHERE 1 - (vectors.embedding <=> %s::vector) > %s
        ORDER BY similarity DESC
        LIMIT %s;
        """

        
        # Execute SQL query
        cursor.execute(sql_query, (query_embedding, query_embedding, match_threshold, match_count))
        
        # Fetch results
        results = cursor.fetchall()
        
        return results
    except Exception as e:
        print(f"An error occurred: {e}")
        return None
    finally:
        cursor.close()
        conn.close()

# Example usage
# query_embedding = [0.1, 0.2, 0.3, 0.4]  # Replace this with your actual query embedding
match_threshold = 0.7
match_count = 10

results = search_vectors(text_vector, match_threshold, match_count)
print(results)


[(5, 'Bratwurst - A type of German sausage made of pork, beef, or veal.', 0.830893747494635), (30, 'Ropa Vieja - A dish of shredded beef in a sauce made from tomatoes, onions, and peppers.', 0.8308790546813), (14, 'Feijoada - A black bean stew with pork or beef.', 0.826070002756316), (15, 'Boerewors - A type of sausage popular in South African cuisine.', 0.823615570778407), (17, 'Asado - A barbecue consisting of beef ribs cooked on a grill, or parrilla.', 0.815841829434386), (18, 'Bandeja Paisa - A platter that generally includes beans, rice, fried eggs, avocado, and meat.', 0.814486982426754), (23, 'Koshari - A dish made of rice, lentils, and pasta, topped with a spiced tomato sauce.', 0.812439993356668), (28, 'Empanada - A pastry filled with meat, cheese, or other ingredients.', 0.809759580111548), (7, 'Paella - A rice dish originally from Valencia, often containing seafood or meat.', 0.805947939450593), (20, 'Meatballs - Typically served with lingonberry sauce and potatoes.', 0.8007

In [8]:
# Example usage
# query_embedding = [0.1, 0.2, 0.3, 0.4]  # Replace this with your actual query embedding
match_threshold = 0.7
match_count = 10

results = search_vectors(text_vector, match_threshold, match_count)
print(results)

An error occurred: operator does not exist: vector <=> numeric[]
LINE 5:             1 - (vectors.embedding <=> ARRAY[0.0040650102309...
                                           ^
HINT:  No operator matches the given name and argument types. You might need to add explicit type casts.

None
