In [2]:
from pprint import pprint
import random

In [2]:
def create_user_interest_dict(*interests):
    interest_count = len(interests)
    interest_weight = (100 - 10) / interest_count  # Subtract 10 for 'Random'
    user_interests = {interest: interest_weight for interest in interests}
    user_interests["Random"] = 10  # Add 'Random' with constant value of 10

    print(
        f"Interest weights: {sum(user_interests.values())}"
    )  # Sum of all weights should be 100

    return user_interests


# New user selects interests (max 5)
# 10 total - 5 start to discover other 5 naturally

carl_interests = create_user_interest_dict(
    "DevOps",
    "Tests (SAT)",
    "Machine Learning",
    "Cats",
    "Cooking"
)

pprint(carl_interests)

Interest weights: 100.0
{'Cats': 18.0,
 'Cooking': 18.0,
 'DevOps': 18.0,
 'Machine Learning': 18.0,
 'Random': 10,
 'Tests (SAT)': 18.0}


In [3]:
# HELPER FUNCTIONS for update_weight


def observe_action_taken(video_tag, liked, shared, watched, loop_count):
    """
    This function calculates the total points based on the user's actions.

    Parameters:
    video_tag (str): The tag of the video.
    liked (bool): Whether the user liked the video.
    shared (bool): Whether the user shared the video.
    watched (bool): Whether the user watched the video.
    loop_count (int): The number of times the user looped the video.

    Returns:
    float: The total points calculated based on the user's actions.
    """
    # Actions and their corresponding points to add
    Actions = {
        "Like": 0.1,
        "Share": 0.2,
        "Watch": 0.1,  # more than 50% of total duration
        # todo: experiment with increasing watch value
        "Loop": 0.05,
    }

    total_points = 0

    if liked:
        total_points += Actions["Like"]
    if shared:
        total_points += Actions["Share"]
    if watched:
        total_points += Actions["Watch"]
    total_points += loop_count * Actions["Loop"]

    print(f"\nUpdating weights for video tag: {video_tag}")
    print(f"Total points to add: {total_points}")

    return total_points


def handle_update_weight(user_dict_interest, video_tag, total_points):
    """
    This function updates the user's interest score based on the total points.

    Parameters:
    user_dict_interest (dict): The user's interest dictionary.
    video_tag (str): The tag of the video.
    total_points (float): The total points calculated based on the user's actions.

    Returns:
    dict: The updated user's interest dictionary.
    """
    # Increase the weight of the interest based on the action points
    user_dict_interest[video_tag] += total_points
    print(f"Updated {video_tag} to {user_dict_interest[video_tag]}\n")

    # Deduct the total points proportionally from existing interests (excluding 'Random' and the video_tag itself)
    total_deductable_weight = sum(
        weight
        for interest, weight in user_dict_interest.items()
        if interest != "Random" and interest != video_tag
    )

    print(
        f"Total weight to deduct from other interests: {total_points}\nTotal deductable weight (excluding 'Random' and {video_tag}): {total_deductable_weight}"
    )

    for interest in user_dict_interest:
        if interest != "Random" and interest != video_tag:
            deduction = (total_points / total_deductable_weight) * user_dict_interest[
                interest
            ]
            user_dict_interest[interest] -= deduction

            print(
                f"Deducted {deduction} from {interest}, new weight: {user_dict_interest[interest]}"
            )

    return user_dict_interest


def handle_add_new_tag(user_dict_interest, video_tag):
    """
    This function adds a new interest to the user's interest dictionary.

    Parameters:
    user_dict_interest (dict): The user's interest dictionary.
    video_tag (str): The tag of the video.

    Returns:
    dict: The updated user's interest dictionary with the new interest added.
    """
    # Add new interest if it doesn't exist and give initial points
    initial_points = 5
    user_dict_interest[video_tag] = initial_points
    print(f"Added new interest {video_tag} with initial weight: {initial_points}")

    # Deduct initial points proportionally from existing interests (excluding 'Random' and current video_tag)
    total_deductable_weight = sum(
        weight
        for interest, weight in user_dict_interest.items()
        if interest != "Random" and interest != video_tag
    )

    print(
        f"Total weight to deduct from other interests: {initial_points}\nTotal deductable weight (excluding 'Random' and {video_tag}): {total_deductable_weight}\n"
    )

    for interest in user_dict_interest:
        if interest != "Random" and interest != video_tag:
            deduction = (initial_points / total_deductable_weight) * user_dict_interest[
                interest
            ]
            user_dict_interest[interest] -= deduction

            pprint(
                f"Deducted {deduction} from {interest}, new weight: {user_dict_interest[interest]}"
            )

    return user_dict_interest

In [4]:
def watch_video_and_update_weight(
    user_dict_interest,
    video_tag,
    liked=False,
    shared=False,
    watched=False,
    loop_count=0,
):

    total_points = observe_action_taken(video_tag, liked, shared, watched, loop_count)

    if video_tag in user_dict_interest:
        handle_update_weight(user_dict_interest, video_tag, total_points)
    else:
        handle_add_new_tag(user_dict_interest, video_tag)

    # Sort the interests ("key=item[1]") by weight in descending order and keep only the top 10
    user_dict_interest = dict(
        sorted(user_dict_interest.items(), key=lambda item: item[1], reverse=True)[:10]
    )

    # Check total weight to ensure it sums to 100 (for debugging purposes)
    total_weight = sum(user_dict_interest.values())
    print(f"Total weight after update: {total_weight} (should be 100)\n")
    # pprint(f"Updated interests: {user_dict_interest}\n")

    return user_dict_interest

In [5]:
def get_random_tag_for_recommendation(user_dict_interest):

    random_value = round(random.uniform(0, 100), 0)

    print("_" * 50)
    print(f"\nRandom value: {random_value}\n")

    cumulative_probability = 0.0
    partitions = []

    for interest, probability in user_dict_interest.items():
        # Save the starting point of the current tag's range (in probability)
        previous_cumulative_probability = cumulative_probability

        # Add the tag's probability to the cumulative total to get the end point of the tag's range
        cumulative_probability += probability

        partitions.append(
            (interest, previous_cumulative_probability, cumulative_probability)
        )

        pprint(
            f"Checking interest: {interest}, cumulative range: {previous_cumulative_probability} - {cumulative_probability}"
        )

        # If the random value is less than or equal to the cumulative probability up to the current interest,
        # select the current interest and stop looking at the rest of the interests.
        if random_value <= cumulative_probability:
            print(f"\nSelected interest: {interest}\n")
            break

    if interest == "Random":
        interest = random.choice(list(user_dict_interest.keys()))
        print(f"Random tag selected: {interest}")
        print("MODIFY THIS LATER TO POINT TO CSV COLUMN")
        return interest

    return interest

In [6]:
# New user selects interests (max 5)
# 10 total - 5 start to discover other 5 naturally

carl_interests = create_user_interest_dict(
    "DevOps",
    "Tests (SAT)",
    "Machine Learning",
    "Cats"
)
pprint(carl_interests)

Interest weights: 100.0
{'Cats': 22.5,
 'DevOps': 22.5,
 'Machine Learning': 22.5,
 'Random': 10,
 'Tests (SAT)': 22.5}


In [7]:
# Simulate user actions
pprint(f"Before update: {carl_interests}")

watch_video_and_update_weight(
    carl_interests, "Cats", liked=True, watched=True, shared=True, loop_count=10
)

pprint(f"After update: {carl_interests}")

("Before update: {'DevOps': 22.5, 'Tests (SAT)': 22.5, 'Machine Learning': "
 "22.5, 'Cats': 22.5, 'Random': 10}")

Updating weights for video tag: Cats
Total points to add: 0.9
Updated Cats to 23.4

Total weight to deduct from other interests: 0.9
Total deductable weight (excluding 'Random' and Cats): 67.5
Deducted 0.30000000000000004 from DevOps, new weight: 22.2
Deducted 0.30000000000000004 from Tests (SAT), new weight: 22.2
Deducted 0.30000000000000004 from Machine Learning, new weight: 22.2
Total weight after update: 100.0 (should be 100)

("After update: {'DevOps': 22.2, 'Tests (SAT)': 22.2, 'Machine Learning': "
 "22.2, 'Cats': 23.4, 'Random': 10}")


In [8]:
# Simulate algorithm picking the next video to recommend via tag
selected_interest = get_random_tag_for_recommendation(carl_interests)

__________________________________________________

Random value: 90.0

'Checking interest: DevOps, cumulative range: 0.0 - 22.2'
'Checking interest: Tests (SAT), cumulative range: 22.2 - 44.4'
'Checking interest: Machine Learning, cumulative range: 44.4 - 66.6'
'Checking interest: Cats, cumulative range: 66.6 - 90.0'

Selected interest: Cats



In [40]:
import pandas as pd
from pprint import pprint
import chromadb

# Set the URL and initialize ChromaDB client
url = "https://docs.google.com/spreadsheets/d/e/2PACX-1vSAE2tBAnAdXsxk9a9YClFN7MSEVhzEmJD01ewwtooMLxL-Ilod26EbdD8sZeZk0ybiqD-jqT-9RZbn/pub?gid=497214901&single=true&output=csv"
chroma_client = chromadb.Client()

# Read the CSV file into a DataFrame
df = pd.read_csv(url)
# Limit the DataFrame to the first 10 rows
df = df.head(10)

def prepare_documents(url):
    # Convert the 'tags' column to a list of strings
    documents = df["tags"].apply(lambda x: x.split(",")).tolist()
    documents_str = [", ".join(doc) for doc in documents]

    return documents_str

def get_recommendations(query, documents):
    # Initialize ChromaDB collection
    collection = chroma_client.get_or_create_collection(name="my_collection")

    # Add the documents to the collection
    ids = [str(i+1) for i in range(len(documents))]
    collection.upsert(documents=documents, ids=ids)

    # Query the collection
    results = collection.query(query_texts=[query], n_results=10)

    # Print the results
    pprint(results)

    # Get the ID of the first result
    result_id = int(results['ids'][0][0])

    # Get the title corresponding to the ID
    title = df.loc[df['video_id'] == result_id, 'video_title'].values[0]

    return title

In [41]:
print(f"Selected Interest = {selected_interest}\n")

# Prepare documents and call the function with a query
edgur_test_data = prepare_documents(url)
recommended_video = get_recommendations(selected_interest, edgur_test_data)

print(f"Up next: {recommended_video}")

Selected Interest = Cats



{'data': None,
 'distances': [[1.6762529611587524,
                1.713335633277893,
                1.7914196252822876,
                1.7973018884658813,
                1.8210651874542236,
                1.8309388160705566,
                1.8676985502243042,
                1.8740798234939575,
                1.9057185649871826,
                1.945681095123291]],
 'documents': [['Python,  Dictionaries,  Tutorial,  Data Structures,  '
                'Programming',
                'Python,  Pandas,  Data Analysis,  CSV,  File Parsing',
                'Linear equations,  NumPy,  Mathematics,  Solving,  Array',
                'Python,  Logical Operators,  Basics,  Array,  Video Tutorial',
                'Sorting algorithms,  Quick sort,  Python programming,  '
                'Algorithm explanation',
                'linear regression,  Python,  introduction,  data analysis,  '
                'machine learning',
                'Python,  List Comprehensions,  Quick Guide,  Pr

# User swipes to next video

In [42]:
watch_video_and_update_weight(carl_interests, "Python", liked=True)
selected_interest = get_random_tag_for_recommendation(carl_interests)
recommended_video = get_recommendations(selected_interest, edgur_test_data)

print(f"Up next: {recommended_video}")


Updating weights for video tag: Python
Total points to add: 0.1
Added new interest Python with initial weight: 5
Total weight to deduct from other interests: 5
Total deductable weight (excluding 'Random' and Python): 90.0

'Deducted 1.2333333333333332 from DevOps, new weight: 20.966666666666665'
'Deducted 1.2333333333333332 from Tests (SAT), new weight: 20.966666666666665'
('Deducted 1.2333333333333332 from Machine Learning, new weight: '
 '20.966666666666665')
'Deducted 1.2999999999999998 from Cats, new weight: 22.099999999999998'
Total weight after update: 100.0 (should be 100)

__________________________________________________

Random value: 22.0

'Checking interest: DevOps, cumulative range: 0.0 - 20.966666666666665'
('Checking interest: Tests (SAT), cumulative range: 20.966666666666665 - '
 '41.93333333333333')

Selected interest: Tests (SAT)

{'data': None,
 'distances': [[1.3169050216674805,
                1.3815304040908813,
                1.419775128364563,
               

______

In [12]:
# remove video from queue (or mark as watched)
    # ID not title to handle duplicates

In [13]:
# random.choice tag from selected video
    # run weights thing all over again

In [None]:
# clustering preprocess
    # https://www.datacamp.com/tutorial/introduction-to-text-embeddings-with-the-open-ai-api

In [None]:
# if no categories select
    # random = 10
    # UI for sliders



In [None]:
# decay top 10 interests



In [None]:
# After 10 swipes, show the category selection card