In [0]:
!pip install supabase

[43mNote: you may need to restart the kernel using %restart_python or dbutils.library.restartPython() to use updated packages.[0m


In [0]:
%restart_python

In [0]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from supabase import create_client, Client
from datetime import datetime, timedelta

Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7f5189d2ac00>
Traceback (most recent call last):
  File "/databricks/python/lib/python3.11/site-packages/threadpoolctl.py", line 400, in match_module_callback
    self._make_module_from_path(filepath)
  File "/databricks/python/lib/python3.11/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
    module = module_class(filepath, prefix, user_api, internal_api)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/databricks/python/lib/python3.11/site-packages/threadpoolctl.py", line 606, in __init__
    self.version = self.get_version()
                   ^^^^^^^^^^^^^^^^^^
  File "/databricks/python/lib/python3.11/site-packages/threadpoolctl.py", line 646, in get_version
    config = get_config().split()
             ^^^^^^^^^^^^^^^^^^
AttributeError: 'NoneType' object has no attribute 'split'


In [0]:

# get encoded url and api_key from databricks secrets
supabase_url = dbutils.secrets.get(scope='rippl', key='supabase_url')
supabase_key = dbutils.secrets.get(scope='rippl', key='supabase_api_key')

# establish connection with supabase backend
supabase : Client = create_client(supabase_url, supabase_key)

In [0]:
# Declaring all functions here
def pull_full_table(table_name):
    # got tired of writing this out multiple times so even though it's more clunkly in jupyter notebooks I made a function for this

    # error handling
    # can I check if table doesn't exist in supabase? 

    # get total count of rows
    response = supabase.table(table_name).select("*", count="exact").execute()
    total_count = response.count

    # calculate num_batches
    num_batches = (total_count // 1000) + 1 # can only pull 1000 per batch 

    # print outputs to validate
    print(f"Total Rows in Table: {total_count}")
    print(f"Num Batches to get Total Rows {num_batches}")

    # actually do the loop

    for batch in range(1, num_batches + 1): # start at 1 (or start variable equation doesn't work)
        
        # get start and end of 'range' of rows to call in table
        start = (batch - 1) * 1000  # eg. first batch, will be 0
        end = (batch * 1000) - 1 # eg. first batch, will be 999 (unless less than 999)
        if  end > total_count:
            end = total_count
        
        # print outputs to validate
        print(f"For batch: {batch}, start: {start}, end: {end}")

        # get rows in range(start, end)
        temp_response = supabase.table(table_name).select("*").range(start, end).execute() 

        # get data
        temp_data = temp_response.data
        
        # convert to pandas dataframe for easy handling
        temp_df = pd.DataFrame(temp_data)

        # concatenate (or if batch one, copy)
        if batch == 1:
            result_df = temp_df.copy()

        else:
            result_df = pd.concat([result_df, temp_df], axis=0)

    # reset index and remove prior index
    result_df.reset_index(drop=True, inplace=True)

    return result_df

def delete_table(table_name):
    # so apparently whenever supabase runs a delete command it needs a 'where' command
    # but the way around this is just to run the neq command -- which I guess is looking for a case where 
    # user_id is the 'dummy_id' to make sure it doesn't delete that one
    # but because the dummy_id is intentionally something that shouldn't be a uuid
    # it ends up deleting the whole table 
    dummy_id = "00000000-0000-0000-0000-000000000000"
    response = supabase.table(table_name).delete().neq("user_id", dummy_id).execute()
    print("Deletion response:", response)



def get_post_recommendations(grouped_df, posts_df):
    post_recommendations = []

    for idx, row in grouped_df.iterrows():
        user_id = row['user_id']
        interests = row['interest_id']

        for idx, post_row in posts_df.iterrows():
            post_interest = post_row['interest_id']
            if post_interest in interests:
                # calculate similarity score here

                # popularity
                
                popularity = 1 # placeholder 

                # age of post 

                time_created = pd.to_datetime(post_row['created_at'])

                # removing because they're not needed right now
                # age = datetime.utcnow() - time_created

                # print(age)

                temp_age = 1 # placeholder
                
                post_recommendations.append({
                    # id property should be autogenerated
                    "similarity_score": int((1 * popularity) / temp_age), #placeholder for now
                    "has_been_recommended": True,
                    "timestamp": datetime.utcnow().isoformat(), # converting to ISO to make serializable
                    "embedding": None, 
                    "user_id": user_id,
                    "recommended_post_id": post_row['id'],
                    # "post_interest_id": post_interest # use this property for testing but comment out when actually pushing
                    })
                
    recommendations_df = pd.DataFrame(post_recommendations)
    return recommendations_df


def validate_post_recommendations(merged_recommendations_df):
    # validating outputs
    recommendations_right = 0
    recommendations_wrong = 0

    for idx, row in merged_recommendations_df.iterrows():
        post_interest_id = row['interest_id_y'].strip()
        user_id = row['user_id_x'].strip()
        user_interests = [interest.strip() for interest in row['interest_id_x']]


        if post_interest_id in user_interests:
            recommendations_right += 1
        else:
            recommendations_wrong += 1


    print(f"recommendations right: {recommendations_right}, recommendations_wrong: {recommendations_wrong}")

def remove_duplicate_post_recommendations(new_df_original, old_df_original):

    #creating copies so I don't accidentally 
    new_df = new_df_original.copy()
    old_df = old_df_original.copy()
    
    if new_df.empty:
        print("new df empty, returning old")
        return old_df
    elif old_df.empty:
        print("old df empty, returning new")
        return new_df
    
    # converts all user_ids and recommended_user_ids to strings and strips trailing and leading whitespaces
    for col in ['user_id', 'recommended_post_id']:
        new_df[col] = new_df[col].astype(str).str.strip()
        old_df[col] = old_df[col].astype(str).str.strip()
    


    old_pairs = set(zip(old_df['user_id'], old_df['recommended_post_id']))

    mask = new_df.apply(lambda row: (row['user_id'], row['recommended_post_id']) not in old_pairs, axis=1)

    if not mask.any():
        print("new df is all duplicates of old df, returning old df")
        return old_df

    # using mask and applying mask to original new_df
    filtered_new_df = new_df_original[mask].reset_index(drop=True)

    return filtered_new_df

def check_oldest_timestamp(old_recs_df, day_threshold=7):
    if old_recs_df.empty:
        print("old_recs is empty, therefore cannot be too old")
        return False

    old_copy = old_recs_df.copy()
    
    old_copy['timestamp'] = pd.to_datetime(old_copy['timestamp'], errors='coerce')

    oldest_ts = old_copy['timestamp'].min()


    day_threshold = datetime.now() - timedelta(days=day_threshold)

    if oldest_ts <= day_threshold:
        print("old_recs_df is older than day threshold set, it should be deleted")
        return True
    else:
        print("old_recs_df is not older than day threshold set, it should be kept")
        return False

def delete_table(table_name):
    # so apparently whenever supabase runs a delete command it needs a 'where' command
    # but the way around this is just to run the neq command -- which I guess is looking for a case where 
    # user_id is the 'dummy_id' to make sure it doesn't delete that one
    # but because the dummy_id is intentionally something that shouldn't be a uuid
    # it ends up deleting the whole table 
    dummy_id = "00000000-0000-0000-0000-000000000000"
    response = supabase.table(table_name).delete().neq("user_id", dummy_id).execute()
    print("Deletion response:", response)


In [0]:
users_df = pull_full_table("users")
users_df

Total Rows in Table: 20
Num Batches to get Total Rows 1
For batch: 1, start: 0, end: 20


Unnamed: 0,id,name,image,description,email,created_at
0,897a4d99-a0d4-474a-b34d-78ab1e062a93,Sierra Trailson,[REDACTED]/stora...,Weekend wanderer and sunrise seeker. I hike to...,abrielle@rippl.world,2025-04-14T02:17:13.164935
1,02c3c862-b8a5-4fde-ac56-e3b5545f18d6,Gregory Mcgregerson,[REDACTED]/stora...,Hi,johnrich398@gmail.com,2025-04-11T04:11:37.742094
2,a46c20e6-1c11-4a59-a70c-1cc22862885d,Savanna Davis,[REDACTED]/stora...,I love video games!,saavannaa.rose@gmail.com,2025-04-09T23:31:03.00942
3,449bc0ab-6942-426e-b573-9cb690ffd1b9,Sydney Blake,[REDACTED]/stora...,I live for fresh powder runs and cozy evenings...,dfalsabrook@gmail.com,2025-04-10T03:35:57.447764
4,3fb510a5-58a0-4994-81ba-29e69fa79db1,David Meddie,[REDACTED]/stora...,I really like cars!,david.meddaugh@atlasschool.com,2025-04-10T20:14:04.190027
5,c2c9fd5e-2040-4ee7-92ec-4bd65a467a27,Cody,[REDACTED]/stora...,,codywalenciak@gmail.com,2025-04-10T20:38:11.824933
6,740f8541-0638-4348-8fd9-72453613be4e,Natalie Baker,[REDACTED]/stora...,Lover of sweet treats and scenic trails. You’l...,abrielleperry22@icloud.com,2025-04-13T03:15:09.953282
7,7ca36877-fc2f-478e-951d-b4af878f25ef,Blake Renshaw,[REDACTED]/stora...,"Grew up in the garage, raised on horsepower an...",juvx6bpas6@knmcadibav.com,2025-04-15T17:12:07.099672
8,ecb95c11-4923-42d0-9a45-01eb27cb832a,Mike Rodgers,[REDACTED]/stora...,Old soul with a love for all things that move ...,david.alasbrook@atlasschool.com,2025-04-10T20:19:26.250657
9,3e6acdeb-aa9a-47f1-879d-d7139eb98e2f,Carson Ridge,[REDACTED]/stora...,I’m a sculptor and ceramic artist who finds pe...,david.a@rippl.world,2025-04-10T04:13:53.414479


In [0]:
user_interest_df = pull_full_table("user_interests")
user_interest_df

Total Rows in Table: 152
Num Batches to get Total Rows 1
For batch: 1, start: 0, end: 152


Unnamed: 0,user_id,interest_id
0,02c3c862-b8a5-4fde-ac56-e3b5545f18d6,15bb36f9-7466-4e83-a148-5096114cce9e
1,02c3c862-b8a5-4fde-ac56-e3b5545f18d6,62b86242-7014-499f-9cb9-42cfca036de9
2,02c3c862-b8a5-4fde-ac56-e3b5545f18d6,6663208a-593c-4c58-8df2-1cf2fda3cce6
3,02c3c862-b8a5-4fde-ac56-e3b5545f18d6,872755d5-698f-4b66-8e80-098c01ca96eb
4,02c3c862-b8a5-4fde-ac56-e3b5545f18d6,96081371-66ce-49ef-adf7-a1f90b47fdb3
...,...,...
147,f8f9ec17-fb66-4689-abc2-529d61dfb1f4,6117a58e-f12f-4af1-b031-da9fa32d0742
148,f8f9ec17-fb66-4689-abc2-529d61dfb1f4,7ee3f21b-2e73-4ddd-b6ab-0ae7d7f60a5c
149,f8f9ec17-fb66-4689-abc2-529d61dfb1f4,baa13863-6091-4872-ba3d-b29a09af0d05
150,f8f9ec17-fb66-4689-abc2-529d61dfb1f4,cbf7c5eb-44e3-4364-b5cd-a0ecd045ee3d


In [0]:
interests_df = pull_full_table("interests")
interests_df

Total Rows in Table: 47
Num Batches to get Total Rows 1
For batch: 1, start: 0, end: 47


Unnamed: 0,id,name,category_id
0,03f7dc13-cd91-4eaf-95c2-59dc783c8e03,Math,324b188f-361a-416d-9f58-83b9a2e8606c
1,06b52f51-32a3-4921-948c-23cb1f474ca3,Pottery,53ea2977-9a06-4445-bc95-b6d8acf72010
2,0f790fe0-7859-4408-9b98-c8cee119d659,Knitting,53ea2977-9a06-4445-bc95-b6d8acf72010
3,15bb36f9-7466-4e83-a148-5096114cce9e,Yoga,3e50575e-4896-4cf1-a98b-35f629474335
4,17a74d46-88b0-40a8-afa8-bbca0d79d285,Rollerskating,384da8b9-33f2-4f22-83c7-fc05d8bf2298
5,1820882b-7561-4f92-9f8d-8c241be21cf6,Skiing,384da8b9-33f2-4f22-83c7-fc05d8bf2298
6,1888b454-b69e-46bf-bc92-24e6a1085518,Playing Instruments,9d5bd5f2-ec1d-46ca-9c75-0965c676d465
7,1f85d55e-0b0c-4dd7-a9b0-82e4cb699145,Cardio,3e50575e-4896-4cf1-a98b-35f629474335
8,31242d1e-66fe-43fb-8d48-06e751c50e32,Studying,324b188f-361a-416d-9f58-83b9a2e8606c
9,40ddb76d-be61-4b45-9ec1-1d917e3a08c0,Sewing,53ea2977-9a06-4445-bc95-b6d8acf72010


In [0]:
n_users = users_df.index.max() + 1 # current number of users
print("n_users: ", n_users)
n_interests = interests_df.index.max() # current number of interests
print("n_interests: ", n_interests)

n_users:  20
n_interests:  46


In [0]:


# interests_df['name] is columns of user_interest matrix
# users_df['user_id"] is rows of user_interest matrix
# if user_df['user_id'] has an interest in the column (interests_df['name']) then set the value to 1
# interest_id and user_id will have to be remapped

new_interest_id_map = {}
j = 0
for old in interests_df['id']:
  new_interest_id_map[old] = j
  j += 1

new_user_id_map = {}
i = 0
for old in users_df['id']:
    new_user_id_map[old] = i
    i += 1

new_interest_id_map

{'03f7dc13-cd91-4eaf-95c2-59dc783c8e03': 0,
 '06b52f51-32a3-4921-948c-23cb1f474ca3': 1,
 '0f790fe0-7859-4408-9b98-c8cee119d659': 2,
 '15bb36f9-7466-4e83-a148-5096114cce9e': 3,
 '17a74d46-88b0-40a8-afa8-bbca0d79d285': 4,
 '1820882b-7561-4f92-9f8d-8c241be21cf6': 5,
 '1888b454-b69e-46bf-bc92-24e6a1085518': 6,
 '1f85d55e-0b0c-4dd7-a9b0-82e4cb699145': 7,
 '31242d1e-66fe-43fb-8d48-06e751c50e32': 8,
 '40ddb76d-be61-4b45-9ec1-1d917e3a08c0': 9,
 '43888455-55cf-4c09-ad26-df7d4673e19c': 10,
 '44227a36-49a0-4f30-94dd-3dc1c4e0a951': 11,
 '4b57788f-1d57-4286-b760-14e6fbccf2f6': 12,
 '5bf4236f-11ed-4332-8bb5-c0cc009adc99': 13,
 '6117a58e-f12f-4af1-b031-da9fa32d0742': 14,
 '62b86242-7014-499f-9cb9-42cfca036de9': 15,
 '6663208a-593c-4c58-8df2-1cf2fda3cce6': 16,
 '6ad40466-14f4-4bbd-8d9c-a17590ab2f2c': 17,
 '6c22dd13-897c-4b98-95d0-32dda931a2d3': 18,
 '72a885d5-db6e-4148-9d2a-09347d948451': 19,
 '7e21b3d7-9a17-4e2a-aa69-a4feba8e7c84': 20,
 '7ee3f21b-2e73-4ddd-b6ab-0ae7d7f60a5c': 21,
 '86b64472-50ea-4d2e

In [0]:
new_user_id_map

{'897a4d99-a0d4-474a-b34d-78ab1e062a93': 0,
 '02c3c862-b8a5-4fde-ac56-e3b5545f18d6': 1,
 'a46c20e6-1c11-4a59-a70c-1cc22862885d': 2,
 '449bc0ab-6942-426e-b573-9cb690ffd1b9': 3,
 '3fb510a5-58a0-4994-81ba-29e69fa79db1': 4,
 'c2c9fd5e-2040-4ee7-92ec-4bd65a467a27': 5,
 '740f8541-0638-4348-8fd9-72453613be4e': 6,
 '7ca36877-fc2f-478e-951d-b4af878f25ef': 7,
 'ecb95c11-4923-42d0-9a45-01eb27cb832a': 8,
 '3e6acdeb-aa9a-47f1-879d-d7139eb98e2f': 9,
 'e13ae6de-0656-493c-9165-80c1b0cd9bf6': 10,
 'ec03cce8-e743-45af-97e9-71bc05588376': 11,
 'f8f9ec17-fb66-4689-abc2-529d61dfb1f4': 12,
 'eb88b8b4-bdea-4aa0-af25-ec728d4083e5': 13,
 '0b7249ca-cc82-4ff1-9d50-d8d61280b717': 14,
 '436c7907-3ded-4313-832a-831fd3259848': 15,
 '76e52c42-0d73-42b7-95c4-20aadcd886ae': 16,
 '45365d21-9977-4505-a0f0-7f090bc33747': 17,
 'a99e13b8-fc64-48c6-a502-72e91972c107': 18,
 '355e5f7c-87fa-45c4-a218-ff904b7a1128': 19}

In [0]:
user_interest_df['has_interest'] = 1
user_interest_df

Unnamed: 0,user_id,interest_id,has_interest
0,02c3c862-b8a5-4fde-ac56-e3b5545f18d6,15bb36f9-7466-4e83-a148-5096114cce9e,1
1,02c3c862-b8a5-4fde-ac56-e3b5545f18d6,62b86242-7014-499f-9cb9-42cfca036de9,1
2,02c3c862-b8a5-4fde-ac56-e3b5545f18d6,6663208a-593c-4c58-8df2-1cf2fda3cce6,1
3,02c3c862-b8a5-4fde-ac56-e3b5545f18d6,872755d5-698f-4b66-8e80-098c01ca96eb,1
4,02c3c862-b8a5-4fde-ac56-e3b5545f18d6,96081371-66ce-49ef-adf7-a1f90b47fdb3,1
...,...,...,...
147,f8f9ec17-fb66-4689-abc2-529d61dfb1f4,6117a58e-f12f-4af1-b031-da9fa32d0742,1
148,f8f9ec17-fb66-4689-abc2-529d61dfb1f4,7ee3f21b-2e73-4ddd-b6ab-0ae7d7f60a5c,1
149,f8f9ec17-fb66-4689-abc2-529d61dfb1f4,baa13863-6091-4872-ba3d-b29a09af0d05,1
150,f8f9ec17-fb66-4689-abc2-529d61dfb1f4,cbf7c5eb-44e3-4364-b5cd-a0ecd045ee3d,1


In [0]:
user_interest_matrix_unprocessed = user_interest_df.pivot(index='user_id', columns='interest_id', values='has_interest')
user_interest_matrix_unprocessed

interest_id,03f7dc13-cd91-4eaf-95c2-59dc783c8e03,06b52f51-32a3-4921-948c-23cb1f474ca3,0f790fe0-7859-4408-9b98-c8cee119d659,15bb36f9-7466-4e83-a148-5096114cce9e,17a74d46-88b0-40a8-afa8-bbca0d79d285,1820882b-7561-4f92-9f8d-8c241be21cf6,1888b454-b69e-46bf-bc92-24e6a1085518,1f85d55e-0b0c-4dd7-a9b0-82e4cb699145,40ddb76d-be61-4b45-9ec1-1d917e3a08c0,43888455-55cf-4c09-ad26-df7d4673e19c,44227a36-49a0-4f30-94dd-3dc1c4e0a951,4b57788f-1d57-4286-b760-14e6fbccf2f6,5bf4236f-11ed-4332-8bb5-c0cc009adc99,6117a58e-f12f-4af1-b031-da9fa32d0742,62b86242-7014-499f-9cb9-42cfca036de9,6663208a-593c-4c58-8df2-1cf2fda3cce6,6c22dd13-897c-4b98-95d0-32dda931a2d3,72a885d5-db6e-4148-9d2a-09347d948451,7e21b3d7-9a17-4e2a-aa69-a4feba8e7c84,7ee3f21b-2e73-4ddd-b6ab-0ae7d7f60a5c,86b64472-50ea-4d2e-b062-69024291cad4,872755d5-698f-4b66-8e80-098c01ca96eb,882a94f2-33f1-412a-931f-96e9dc1b948b,88d66a6d-8bdb-413e-8a21-a2012c737f16,912f8c61-5ab3-4283-b0ad-015b0038ba23,93c2f1f1-c92f-4a33-af8e-669b40d1b34e,96081371-66ce-49ef-adf7-a1f90b47fdb3,98df11b0-8fe1-481f-b6e3-464a9b8c141a,9b9d4d70-dec0-46a9-b7ef-47a773735861,b0d74040-5f4f-4e22-b874-93a61352346f,b17e02df-e59b-463d-9bb9-17da4029df75,ba47674f-e078-4769-85d8-8e46e20d39c8,ba5bc1b4-501c-4fbd-899c-63375b9b5ca3,baa13863-6091-4872-ba3d-b29a09af0d05,c563e58a-aae7-4ff2-ba87-2fc1035168e2,cbf7c5eb-44e3-4364-b5cd-a0ecd045ee3d,d6d79c59-6338-47b0-b6b4-88bad3f0027e,e558275e-00d8-4958-a134-2ac23927b2da,e725fe78-8e89-45b2-a7a3-e41e7e5f6733,e7a6d63c-b7bf-49ae-8b15-0c9d4eee16f6,e8f7f688-a517-46a5-b708-3413bc78fb57,fea6ed7d-9c6b-44ac-9e8e-198ca9a681a2,ff459b3e-f2aa-4057-b7c2-8a8ddf98bd9d
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1
02c3c862-b8a5-4fde-ac56-e3b5545f18d6,,,,1.0,,,,,,,,,,,1.0,1.0,,,,,,1.0,,,,,1.0,,,,,,1.0,,,,,,,,,,
0b7249ca-cc82-4ff1-9d50-d8d61280b717,,,,,1.0,,1.0,,1.0,1.0,,,,,,,,,,,,,,,,,,,,,1.0,,,,,,,,,,,1.0,
355e5f7c-87fa-45c4-a218-ff904b7a1128,,,,,,,,,,,,1.0,,,,,1.0,,,,,,,,,1.0,,1.0,,,,,,1.0,,,1.0,,,,,,
3e6acdeb-aa9a-47f1-879d-d7139eb98e2f,,1.0,,1.0,,,,,,,,,,,,,1.0,,,,,,,,,,,1.0,,,,,,,,,,,1.0,1.0,,,
3fb510a5-58a0-4994-81ba-29e69fa79db1,,,,,,,,,,,,,,1.0,,,1.0,,,,,1.0,,1.0,,,1.0,1.0,,,,1.0,,,,,1.0,,,,,,
436c7907-3ded-4313-832a-831fd3259848,,1.0,1.0,,,,,,,1.0,,,,,,,,,1.0,,,,1.0,,,,,,,,1.0,,,,1.0,1.0,,1.0,,,,,
449bc0ab-6942-426e-b573-9cb690ffd1b9,,,,,,1.0,,,,1.0,,,1.0,,1.0,,,,,,,,,,,1.0,,1.0,,,,,,,,,,,,,,,
45365d21-9977-4505-a0f0-7f090bc33747,,,,,,,1.0,,1.0,1.0,1.0,,,1.0,,,,,1.0,,1.0,,,,,,,,,,,,,,1.0,,1.0,,,,,,
740f8541-0638-4348-8fd9-72453613be4e,,,,,,,,,,,,,,,,,,1.0,,,1.0,,,,,,,1.0,,,,,,,,,1.0,,,1.0,1.0,,
76e52c42-0d73-42b7-95c4-20aadcd886ae,,1.0,1.0,,,,,,,,,,,,,,,,,,,,1.0,,,1.0,,,,,,,,,,,1.0,1.0,,1.0,,,


In [0]:
user_interest_matrix = user_interest_matrix_unprocessed.copy()
user_interest_matrix.index = user_interest_matrix.index.map(lambda uid: new_user_id_map[uid])
user_interest_matrix.index.name = None
user_interest_matrix.index.name = "user_id"
user_interest_matrix

interest_id,03f7dc13-cd91-4eaf-95c2-59dc783c8e03,06b52f51-32a3-4921-948c-23cb1f474ca3,0f790fe0-7859-4408-9b98-c8cee119d659,15bb36f9-7466-4e83-a148-5096114cce9e,17a74d46-88b0-40a8-afa8-bbca0d79d285,1820882b-7561-4f92-9f8d-8c241be21cf6,1888b454-b69e-46bf-bc92-24e6a1085518,1f85d55e-0b0c-4dd7-a9b0-82e4cb699145,40ddb76d-be61-4b45-9ec1-1d917e3a08c0,43888455-55cf-4c09-ad26-df7d4673e19c,44227a36-49a0-4f30-94dd-3dc1c4e0a951,4b57788f-1d57-4286-b760-14e6fbccf2f6,5bf4236f-11ed-4332-8bb5-c0cc009adc99,6117a58e-f12f-4af1-b031-da9fa32d0742,62b86242-7014-499f-9cb9-42cfca036de9,6663208a-593c-4c58-8df2-1cf2fda3cce6,6c22dd13-897c-4b98-95d0-32dda931a2d3,72a885d5-db6e-4148-9d2a-09347d948451,7e21b3d7-9a17-4e2a-aa69-a4feba8e7c84,7ee3f21b-2e73-4ddd-b6ab-0ae7d7f60a5c,86b64472-50ea-4d2e-b062-69024291cad4,872755d5-698f-4b66-8e80-098c01ca96eb,882a94f2-33f1-412a-931f-96e9dc1b948b,88d66a6d-8bdb-413e-8a21-a2012c737f16,912f8c61-5ab3-4283-b0ad-015b0038ba23,93c2f1f1-c92f-4a33-af8e-669b40d1b34e,96081371-66ce-49ef-adf7-a1f90b47fdb3,98df11b0-8fe1-481f-b6e3-464a9b8c141a,9b9d4d70-dec0-46a9-b7ef-47a773735861,b0d74040-5f4f-4e22-b874-93a61352346f,b17e02df-e59b-463d-9bb9-17da4029df75,ba47674f-e078-4769-85d8-8e46e20d39c8,ba5bc1b4-501c-4fbd-899c-63375b9b5ca3,baa13863-6091-4872-ba3d-b29a09af0d05,c563e58a-aae7-4ff2-ba87-2fc1035168e2,cbf7c5eb-44e3-4364-b5cd-a0ecd045ee3d,d6d79c59-6338-47b0-b6b4-88bad3f0027e,e558275e-00d8-4958-a134-2ac23927b2da,e725fe78-8e89-45b2-a7a3-e41e7e5f6733,e7a6d63c-b7bf-49ae-8b15-0c9d4eee16f6,e8f7f688-a517-46a5-b708-3413bc78fb57,fea6ed7d-9c6b-44ac-9e8e-198ca9a681a2,ff459b3e-f2aa-4057-b7c2-8a8ddf98bd9d
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1
1,,,,1.0,,,,,,,,,,,1.0,1.0,,,,,,1.0,,,,,1.0,,,,,,1.0,,,,,,,,,,
14,,,,,1.0,,1.0,,1.0,1.0,,,,,,,,,,,,,,,,,,,,,1.0,,,,,,,,,,,1.0,
19,,,,,,,,,,,,1.0,,,,,1.0,,,,,,,,,1.0,,1.0,,,,,,1.0,,,1.0,,,,,,
9,,1.0,,1.0,,,,,,,,,,,,,1.0,,,,,,,,,,,1.0,,,,,,,,,,,1.0,1.0,,,
4,,,,,,,,,,,,,,1.0,,,1.0,,,,,1.0,,1.0,,,1.0,1.0,,,,1.0,,,,,1.0,,,,,,
15,,1.0,1.0,,,,,,,1.0,,,,,,,,,1.0,,,,1.0,,,,,,,,1.0,,,,1.0,1.0,,1.0,,,,,
3,,,,,,1.0,,,,1.0,,,1.0,,1.0,,,,,,,,,,,1.0,,1.0,,,,,,,,,,,,,,,
17,,,,,,,1.0,,1.0,1.0,1.0,,,1.0,,,,,1.0,,1.0,,,,,,,,,,,,,,1.0,,1.0,,,,,,
6,,,,,,,,,,,,,,,,,,1.0,,,1.0,,,,,,,1.0,,,,,,,,,1.0,,,1.0,1.0,,
16,,1.0,1.0,,,,,,,,,,,,,,,,,,,,1.0,,,1.0,,,,,,,,,,,1.0,1.0,,1.0,,,


In [0]:
posts_df = pull_full_table("posts")
posts_df

Total Rows in Table: 29
Num Batches to get Total Rows 1
For batch: 1, start: 0, end: 29


Unnamed: 0,id,title,location,description,created_at,image,user_id,post_type,interest_id,event_date
0,f177092a-ec2a-4504-a950-7e61442eedc1,Revival at Ridgeway Raceway,Ridgeway Abandoned Raceway,We’re bringing the heat back to the old Ridgew...,2025-04-15T17:54:51.142+00:00,[REDACTED]/stora...,7ca36877-fc2f-478e-951d-b4af878f25ef,event,ba47674f-e078-4769-85d8-8e46e20d39c8,2025-05-03T00:45:00
1,8d3889f3-099e-4abd-b04e-02d0a65ba519,First post!,50,My first post! Happy to be here! Let's make so...,2025-04-10T20:45:50.360114+00:00,,c2c9fd5e-2040-4ee7-92ec-4bd65a467a27,note,fea6ed7d-9c6b-44ac-9e8e-198ca9a681a2,
2,610e4a9c-c1ab-43bb-9e33-80ed4c0a8756,Train watching,Train station,Watch the trains go by,2025-04-11T04:21:17.791645+00:00,[REDACTED]/stora...,02c3c862-b8a5-4fde-ac56-e3b5545f18d6,event,96081371-66ce-49ef-adf7-a1f90b47fdb3,2025-04-20T03:19:00
3,8ac2c026-f66d-42f8-8566-9b882a366e68,Need Skateboard Recommendations,50,Hey skate fam! I’ve been riding an old setup f...,2025-04-15T21:32:11.767+00:00,[REDACTED]/stora...,eb88b8b4-bdea-4aa0-af25-ec728d4083e5,note,86b64472-50ea-4d2e-b062-69024291cad4,
4,912663e7-eaed-416c-a6e2-342efcf4cefc,Taking f-350 to the corvette show tonight.,50,I'll be rollin coal in the parking lot,2025-04-10T20:49:12.189124+00:00,[REDACTED]/stora...,ecb95c11-4923-42d0-9a45-01eb27cb832a,note,872755d5-698f-4b66-8e80-098c01ca96eb,
5,08a9cb3f-c5c6-4c96-81c5-e723c9a39dd8,What A Rager,50,Such a fun time at gnargaritaville,2025-04-16T02:00:03.95+00:00,[REDACTED]/stora...,45365d21-9977-4505-a0f0-7f090bc33747,note,1888b454-b69e-46bf-bc92-24e6a1085518,
6,7cbac01e-bb20-4a1d-93ed-bd53e2bf8c7a,"Earth, Fire, and Form",50,Just pulled this new piece from the kiln — ins...,2025-04-16T02:15:09.095+00:00,[REDACTED]/stora...,3e6acdeb-aa9a-47f1-879d-d7139eb98e2f,note,06b52f51-32a3-4921-948c-23cb1f474ca3,
7,9380aebc-a2d1-4b9a-b4d7-34a7536e663d,"Home-Cooked, Plant-Powered",50,Tried out this roasted veggie tagine I picked ...,2025-04-16T02:28:09.236+00:00,[REDACTED]/stora...,e13ae6de-0656-493c-9165-80c1b0cd9bf6,note,6663208a-593c-4c58-8df2-1cf2fda3cce6,
8,18432e3c-70ed-4a0c-a5fd-625f769fdd36,Reading Night!,Downtown Tulsa Library,Let’s all get together and read a few chapters...,2025-04-12T00:58:21.134838+00:00,[REDACTED]/stora...,a46c20e6-1c11-4a59-a70c-1cc22862885d,event,b17e02df-e59b-463d-9bb9-17da4029df75,2025-04-13T17:56:00
9,a2f14788-0163-4395-86f1-51df75ea124c,Beans And Reads,Fulton St Books,Coffee and reading with friends.\n,2025-04-12T01:16:07.381792+00:00,[REDACTED]/stora...,a46c20e6-1c11-4a59-a70c-1cc22862885d,event,b17e02df-e59b-463d-9bb9-17da4029df75,2025-04-24T10:00:00


In [0]:
merged_user_interests = pd.merge(users_df, user_interest_df, left_on='id', right_on='user_id')
merged_user_interests = merged_user_interests.drop(['id', "has_interest", "image", "created_at", "email"], axis=1)
merged_user_interests

Unnamed: 0,name,description,user_id,interest_id
0,Sierra Trailson,Weekend wanderer and sunrise seeker. I hike to...,897a4d99-a0d4-474a-b34d-78ab1e062a93,06b52f51-32a3-4921-948c-23cb1f474ca3
1,Sierra Trailson,Weekend wanderer and sunrise seeker. I hike to...,897a4d99-a0d4-474a-b34d-78ab1e062a93,0f790fe0-7859-4408-9b98-c8cee119d659
2,Sierra Trailson,Weekend wanderer and sunrise seeker. I hike to...,897a4d99-a0d4-474a-b34d-78ab1e062a93,1820882b-7561-4f92-9f8d-8c241be21cf6
3,Sierra Trailson,Weekend wanderer and sunrise seeker. I hike to...,897a4d99-a0d4-474a-b34d-78ab1e062a93,1f85d55e-0b0c-4dd7-a9b0-82e4cb699145
4,Sierra Trailson,Weekend wanderer and sunrise seeker. I hike to...,897a4d99-a0d4-474a-b34d-78ab1e062a93,40ddb76d-be61-4b45-9ec1-1d917e3a08c0
...,...,...,...,...
147,Samantha Waters,Im into doing outdoor activities!,355e5f7c-87fa-45c4-a218-ff904b7a1128,6c22dd13-897c-4b98-95d0-32dda931a2d3
148,Samantha Waters,Im into doing outdoor activities!,355e5f7c-87fa-45c4-a218-ff904b7a1128,93c2f1f1-c92f-4a33-af8e-669b40d1b34e
149,Samantha Waters,Im into doing outdoor activities!,355e5f7c-87fa-45c4-a218-ff904b7a1128,98df11b0-8fe1-481f-b6e3-464a9b8c141a
150,Samantha Waters,Im into doing outdoor activities!,355e5f7c-87fa-45c4-a218-ff904b7a1128,baa13863-6091-4872-ba3d-b29a09af0d05


In [0]:
grouped_df = merged_user_interests.groupby("user_id")['interest_id'].apply(list).reset_index()
grouped_df

Unnamed: 0,user_id,interest_id
0,02c3c862-b8a5-4fde-ac56-e3b5545f18d6,"[15bb36f9-7466-4e83-a148-5096114cce9e, 62b8624..."
1,0b7249ca-cc82-4ff1-9d50-d8d61280b717,"[17a74d46-88b0-40a8-afa8-bbca0d79d285, 1888b45..."
2,355e5f7c-87fa-45c4-a218-ff904b7a1128,"[4b57788f-1d57-4286-b760-14e6fbccf2f6, 6c22dd1..."
3,3e6acdeb-aa9a-47f1-879d-d7139eb98e2f,"[06b52f51-32a3-4921-948c-23cb1f474ca3, 15bb36f..."
4,3fb510a5-58a0-4994-81ba-29e69fa79db1,"[6117a58e-f12f-4af1-b031-da9fa32d0742, 6c22dd1..."
5,436c7907-3ded-4313-832a-831fd3259848,"[06b52f51-32a3-4921-948c-23cb1f474ca3, 0f790fe..."
6,449bc0ab-6942-426e-b573-9cb690ffd1b9,"[1820882b-7561-4f92-9f8d-8c241be21cf6, 4388845..."
7,45365d21-9977-4505-a0f0-7f090bc33747,"[1888b454-b69e-46bf-bc92-24e6a1085518, 40ddb76..."
8,740f8541-0638-4348-8fd9-72453613be4e,"[72a885d5-db6e-4148-9d2a-09347d948451, 86b6447..."
9,76e52c42-0d73-42b7-95c4-20aadcd886ae,"[06b52f51-32a3-4921-948c-23cb1f474ca3, 0f790fe..."


In [0]:
post_popularity_df = pull_full_table("post_popularity")
post_popularity_df

Total Rows in Table: 29
Num Batches to get Total Rows 1
For batch: 1, start: 0, end: 29


Unnamed: 0,post_id,likes,comments,reposts,total_engagement
0,e0ee1a26-3671-4d25-b638-a192bc8c1d5c,0,0,0,0
1,2432b903-38f1-4716-af73-cbf25e21a095,0,0,0,0
2,b1b674dd-873a-452f-a7fe-63ce591cb0ab,0,0,0,0
3,2d7e6d6c-b299-4ee0-a6b6-801458dfed4b,0,0,0,0
4,f177092a-ec2a-4504-a950-7e61442eedc1,0,0,0,0
5,8ac2c026-f66d-42f8-8566-9b882a366e68,0,0,0,0
6,08a9cb3f-c5c6-4c96-81c5-e723c9a39dd8,0,0,0,0
7,7cbac01e-bb20-4a1d-93ed-bd53e2bf8c7a,0,0,0,0
8,9380aebc-a2d1-4b9a-b4d7-34a7536e663d,0,0,0,0
9,cf083aa2-f768-4283-81b1-1d475e85556f,0,0,0,0


In [0]:
recommendations_df = get_post_recommendations(grouped_df, posts_df)
recommendations_df

Unnamed: 0,similarity_score,has_been_recommended,timestamp,embedding,user_id,recommended_post_id
0,1,True,2025-04-16T23:11:44.917244,,02c3c862-b8a5-4fde-ac56-e3b5545f18d6,610e4a9c-c1ab-43bb-9e33-80ed4c0a8756
1,1,True,2025-04-16T23:11:44.917638,,02c3c862-b8a5-4fde-ac56-e3b5545f18d6,912663e7-eaed-416c-a6e2-342efcf4cefc
2,1,True,2025-04-16T23:11:44.917969,,02c3c862-b8a5-4fde-ac56-e3b5545f18d6,9380aebc-a2d1-4b9a-b4d7-34a7536e663d
3,1,True,2025-04-16T23:11:44.919562,,0b7249ca-cc82-4ff1-9d50-d8d61280b717,8d3889f3-099e-4abd-b04e-02d0a65ba519
4,1,True,2025-04-16T23:11:44.920010,,0b7249ca-cc82-4ff1-9d50-d8d61280b717,08a9cb3f-c5c6-4c96-81c5-e723c9a39dd8
...,...,...,...,...,...,...
117,1,True,2025-04-16T23:11:44.963460,,ecb95c11-4923-42d0-9a45-01eb27cb832a,b1b674dd-873a-452f-a7fe-63ce591cb0ab
118,1,True,2025-04-16T23:11:44.963636,,ecb95c11-4923-42d0-9a45-01eb27cb832a,2d7e6d6c-b299-4ee0-a6b6-801458dfed4b
119,1,True,2025-04-16T23:11:44.964158,,f8f9ec17-fb66-4689-abc2-529d61dfb1f4,08a9cb3f-c5c6-4c96-81c5-e723c9a39dd8
120,1,True,2025-04-16T23:11:44.964801,,f8f9ec17-fb66-4689-abc2-529d61dfb1f4,edad2659-5247-4e10-b792-469b4d4d36d1


In [0]:
old_user_post_recommendations_df = pull_full_table("user_post_recommendations")
old_user_post_recommendations_df

Total Rows in Table: 122
Num Batches to get Total Rows 1
For batch: 1, start: 0, end: 122


Unnamed: 0,id,similarity_score,has_been_recommended,timestamp,embedding,user_id,recommended_post_id
0,14d3ec99-d878-445e-ad43-170c50c520f1,1,True,2025-04-16T21:13:10.49938,,02c3c862-b8a5-4fde-ac56-e3b5545f18d6,610e4a9c-c1ab-43bb-9e33-80ed4c0a8756
1,2a04081a-c0bf-41d1-8d7c-3a5a5daba1a6,1,True,2025-04-16T21:13:10.499652,,02c3c862-b8a5-4fde-ac56-e3b5545f18d6,912663e7-eaed-416c-a6e2-342efcf4cefc
2,556353e7-b45f-40df-b80d-72ab4a7c5bc7,1,True,2025-04-16T21:13:10.499931,,02c3c862-b8a5-4fde-ac56-e3b5545f18d6,9380aebc-a2d1-4b9a-b4d7-34a7536e663d
3,1c22839f-1f42-4897-99ac-ab155363af7a,1,True,2025-04-16T21:13:10.501307,,0b7249ca-cc82-4ff1-9d50-d8d61280b717,8d3889f3-099e-4abd-b04e-02d0a65ba519
4,35ced054-d1f5-4f54-a647-3a2c438b53a2,1,True,2025-04-16T21:13:10.501576,,0b7249ca-cc82-4ff1-9d50-d8d61280b717,08a9cb3f-c5c6-4c96-81c5-e723c9a39dd8
...,...,...,...,...,...,...,...
117,a2ac9e30-eb7f-4227-ac58-2150bf9b8b6a,1,True,2025-04-16T21:13:10.54228,,ecb95c11-4923-42d0-9a45-01eb27cb832a,b1b674dd-873a-452f-a7fe-63ce591cb0ab
118,681ac01d-5ffd-40df-8921-a455c44849cf,1,True,2025-04-16T21:13:10.542437,,ecb95c11-4923-42d0-9a45-01eb27cb832a,2d7e6d6c-b299-4ee0-a6b6-801458dfed4b
119,3941e954-4fdf-468c-a19e-53c09238b755,1,True,2025-04-16T21:13:10.54298,,f8f9ec17-fb66-4689-abc2-529d61dfb1f4,08a9cb3f-c5c6-4c96-81c5-e723c9a39dd8
120,ed18b65b-e939-4a5c-97a6-eac8c5aee84a,1,True,2025-04-16T21:13:10.543474,,f8f9ec17-fb66-4689-abc2-529d61dfb1f4,edad2659-5247-4e10-b792-469b4d4d36d1


In [0]:
# unrealistic threshold to prevent accidental deletion
day_threshold = 100

# if oldest date found in old_user_user_recommendations has a 7 day difference between it and utcnow()
if check_oldest_timestamp(old_user_post_recommendations_df, day_threshold=day_threshold):
    # uncomment when live
    delete_table("user_post_recommendations")
else:
    print("removing duplicates from new batch of recommendations_df")
    recommendations_df = remove_duplicate_post_recommendations(recommendations_df, old_user_post_recommendations_df)
    recommendations_df



old_recs is empty, therefore cannot be too old
removing duplicates from new batch of recommendations_df
old df empty, returning new


In [0]:
recommendations_df

Unnamed: 0,similarity_score,has_been_recommended,timestamp,embedding,user_id,recommended_post_id
0,1,True,2025-04-16T21:13:10.499380,,02c3c862-b8a5-4fde-ac56-e3b5545f18d6,610e4a9c-c1ab-43bb-9e33-80ed4c0a8756
1,1,True,2025-04-16T21:13:10.499652,,02c3c862-b8a5-4fde-ac56-e3b5545f18d6,912663e7-eaed-416c-a6e2-342efcf4cefc
2,1,True,2025-04-16T21:13:10.499931,,02c3c862-b8a5-4fde-ac56-e3b5545f18d6,9380aebc-a2d1-4b9a-b4d7-34a7536e663d
3,1,True,2025-04-16T21:13:10.501307,,0b7249ca-cc82-4ff1-9d50-d8d61280b717,8d3889f3-099e-4abd-b04e-02d0a65ba519
4,1,True,2025-04-16T21:13:10.501576,,0b7249ca-cc82-4ff1-9d50-d8d61280b717,08a9cb3f-c5c6-4c96-81c5-e723c9a39dd8
...,...,...,...,...,...,...
117,1,True,2025-04-16T21:13:10.542280,,ecb95c11-4923-42d0-9a45-01eb27cb832a,b1b674dd-873a-452f-a7fe-63ce591cb0ab
118,1,True,2025-04-16T21:13:10.542437,,ecb95c11-4923-42d0-9a45-01eb27cb832a,2d7e6d6c-b299-4ee0-a6b6-801458dfed4b
119,1,True,2025-04-16T21:13:10.542980,,f8f9ec17-fb66-4689-abc2-529d61dfb1f4,08a9cb3f-c5c6-4c96-81c5-e723c9a39dd8
120,1,True,2025-04-16T21:13:10.543474,,f8f9ec17-fb66-4689-abc2-529d61dfb1f4,edad2659-5247-4e10-b792-469b4d4d36d1


In [0]:
merged_recommendations_df = pd.merge(recommendations_df, grouped_df, on='user_id')
merged_recommendations_df = pd.merge(merged_recommendations_df, posts_df, left_on='recommended_post_id', right_on='id')
merged_recommendations_df

Unnamed: 0,similarity_score,has_been_recommended,timestamp,embedding,user_id_x,recommended_post_id,interest_id_x,id,title,location,description,created_at,image,user_id_y,post_type,interest_id_y,event_date
0,1,True,2025-04-16T23:11:44.917244,,02c3c862-b8a5-4fde-ac56-e3b5545f18d6,610e4a9c-c1ab-43bb-9e33-80ed4c0a8756,"[15bb36f9-7466-4e83-a148-5096114cce9e, 62b8624...",610e4a9c-c1ab-43bb-9e33-80ed4c0a8756,Train watching,Train station,Watch the trains go by,2025-04-11T04:21:17.791645+00:00,[REDACTED]/stora...,02c3c862-b8a5-4fde-ac56-e3b5545f18d6,event,96081371-66ce-49ef-adf7-a1f90b47fdb3,2025-04-20T03:19:00
1,1,True,2025-04-16T23:11:44.926070,,3fb510a5-58a0-4994-81ba-29e69fa79db1,610e4a9c-c1ab-43bb-9e33-80ed4c0a8756,"[6117a58e-f12f-4af1-b031-da9fa32d0742, 6c22dd1...",610e4a9c-c1ab-43bb-9e33-80ed4c0a8756,Train watching,Train station,Watch the trains go by,2025-04-11T04:21:17.791645+00:00,[REDACTED]/stora...,02c3c862-b8a5-4fde-ac56-e3b5545f18d6,event,96081371-66ce-49ef-adf7-a1f90b47fdb3,2025-04-20T03:19:00
2,1,True,2025-04-16T23:11:44.961591,,ecb95c11-4923-42d0-9a45-01eb27cb832a,610e4a9c-c1ab-43bb-9e33-80ed4c0a8756,"[6c22dd13-897c-4b98-95d0-32dda931a2d3, 872755d...",610e4a9c-c1ab-43bb-9e33-80ed4c0a8756,Train watching,Train station,Watch the trains go by,2025-04-11T04:21:17.791645+00:00,[REDACTED]/stora...,02c3c862-b8a5-4fde-ac56-e3b5545f18d6,event,96081371-66ce-49ef-adf7-a1f90b47fdb3,2025-04-20T03:19:00
3,1,True,2025-04-16T23:11:44.917638,,02c3c862-b8a5-4fde-ac56-e3b5545f18d6,912663e7-eaed-416c-a6e2-342efcf4cefc,"[15bb36f9-7466-4e83-a148-5096114cce9e, 62b8624...",912663e7-eaed-416c-a6e2-342efcf4cefc,Taking f-350 to the corvette show tonight.,50,I'll be rollin coal in the parking lot,2025-04-10T20:49:12.189124+00:00,[REDACTED]/stora...,ecb95c11-4923-42d0-9a45-01eb27cb832a,note,872755d5-698f-4b66-8e80-098c01ca96eb,
4,1,True,2025-04-16T23:11:44.926357,,3fb510a5-58a0-4994-81ba-29e69fa79db1,912663e7-eaed-416c-a6e2-342efcf4cefc,"[6117a58e-f12f-4af1-b031-da9fa32d0742, 6c22dd1...",912663e7-eaed-416c-a6e2-342efcf4cefc,Taking f-350 to the corvette show tonight.,50,I'll be rollin coal in the parking lot,2025-04-10T20:49:12.189124+00:00,[REDACTED]/stora...,ecb95c11-4923-42d0-9a45-01eb27cb832a,note,872755d5-698f-4b66-8e80-098c01ca96eb,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
117,1,True,2025-04-16T23:11:44.942455,,7ca36877-fc2f-478e-951d-b4af878f25ef,2432b903-38f1-4716-af73-cbf25e21a095,"[872755d5-698f-4b66-8e80-098c01ca96eb, 88d66a6...",2432b903-38f1-4716-af73-cbf25e21a095,Had A Good Time Cleaning PC Fans!,50,Had a little meeting to clean our systems toni...,2025-04-14T02:53:31.408+00:00,,449bc0ab-6942-426e-b573-9cb690ffd1b9,note,ff459b3e-f2aa-4057-b7c2-8a8ddf98bd9d,
118,1,True,2025-04-16T23:11:44.952458,,c2c9fd5e-2040-4ee7-92ec-4bd65a467a27,2432b903-38f1-4716-af73-cbf25e21a095,"[06b52f51-32a3-4921-948c-23cb1f474ca3, 1888b45...",2432b903-38f1-4716-af73-cbf25e21a095,Had A Good Time Cleaning PC Fans!,50,Had a little meeting to clean our systems toni...,2025-04-14T02:53:31.408+00:00,,449bc0ab-6942-426e-b573-9cb690ffd1b9,note,ff459b3e-f2aa-4057-b7c2-8a8ddf98bd9d,
119,1,True,2025-04-16T23:11:44.942734,,7ca36877-fc2f-478e-951d-b4af878f25ef,2d7e6d6c-b299-4ee0-a6b6-801458dfed4b,"[872755d5-698f-4b66-8e80-098c01ca96eb, 88d66a6...",2d7e6d6c-b299-4ee0-a6b6-801458dfed4b,Iron Bros,50,Big day at the gym—me and my boy Cam crushed o...,2025-04-15T17:46:42.555+00:00,[REDACTED]/stora...,7ca36877-fc2f-478e-951d-b4af878f25ef,note,b0d74040-5f4f-4e22-b874-93a61352346f,
120,1,True,2025-04-16T23:11:44.963636,,ecb95c11-4923-42d0-9a45-01eb27cb832a,2d7e6d6c-b299-4ee0-a6b6-801458dfed4b,"[6c22dd13-897c-4b98-95d0-32dda931a2d3, 872755d...",2d7e6d6c-b299-4ee0-a6b6-801458dfed4b,Iron Bros,50,Big day at the gym—me and my boy Cam crushed o...,2025-04-15T17:46:42.555+00:00,[REDACTED]/stora...,7ca36877-fc2f-478e-951d-b4af878f25ef,note,b0d74040-5f4f-4e22-b874-93a61352346f,


In [0]:

validate_post_recommendations(merged_recommendations_df)

recommendations right: 122, recommendations_wrong: 0


In [0]:
# here we filter by various attributes (age, location ?)



In [0]:
# convert to dictionary, this is the format that the supabase client expects
records = recommendations_df.to_dict(orient="records") 
records 

[{'similarity_score': 1,
  'has_been_recommended': True,
  'timestamp': '2025-04-16T21:13:10.499380',
  'embedding': None,
  'user_id': '02c3c862-b8a5-4fde-ac56-e3b5545f18d6',
  'recommended_post_id': '610e4a9c-c1ab-43bb-9e33-80ed4c0a8756'},
 {'similarity_score': 1,
  'has_been_recommended': True,
  'timestamp': '2025-04-16T21:13:10.499652',
  'embedding': None,
  'user_id': '02c3c862-b8a5-4fde-ac56-e3b5545f18d6',
  'recommended_post_id': '912663e7-eaed-416c-a6e2-342efcf4cefc'},
 {'similarity_score': 1,
  'has_been_recommended': True,
  'timestamp': '2025-04-16T21:13:10.499931',
  'embedding': None,
  'user_id': '02c3c862-b8a5-4fde-ac56-e3b5545f18d6',
  'recommended_post_id': '9380aebc-a2d1-4b9a-b4d7-34a7536e663d'},
 {'similarity_score': 1,
  'has_been_recommended': True,
  'timestamp': '2025-04-16T21:13:10.501307',
  'embedding': None,
  'user_id': '0b7249ca-cc82-4ff1-9d50-d8d61280b717',
  'recommended_post_id': '8d3889f3-099e-4abd-b04e-02d0a65ba519'},
 {'similarity_score': 1,
  'has_

In [0]:
# don't uncomment out the push table line until ready

response = supabase.table("user_post_recommendations").insert(records).execute()

In [0]:
# things still to do for this specific script

#2. TODO Add the popularity/age element to the similarity score
#3. TODO Figure out how to do the filtering part 
#4. TODO validate that the script runs when and how I want to when I'm not there
#6. TODO remove unnecessary elements from the script
#7. TODO Optimize time complexity and computational complexity of the script 

