# Main notebook - experimenting with guesses

SPDX-License-Identifier: 0BSD

This does semantic search based solely on repository name and user input.

Compare to
[`findrepo`](https://github.com/EliahKagan/newrepo-findrepo#using-findrepo).

In [1]:
import logging

import numpy as np
from tabulate import tabulate

import fr2ex

In [2]:
logging.basicConfig(level=logging.INFO)

In [3]:
names = fr2ex.remote.fetch_repo_names()

INFO:paramiko.transport:Connected (version 2.0, client OpenSSH_7.6p1)
INFO:paramiko.transport:Authentication (publickey) successful!
INFO:paramiko.transport.sftp:[chan 0] Opened sftp connection (server version 3)
INFO:paramiko.transport.sftp:[chan 0] sftp session closed.


In [4]:
len(names)

703

In [5]:
moderation = fr2ex.moderation.get_moderation(names)

INFO:root:Querying OpenAI moderation endpoint.
INFO:openai:error_code=None error_message='The input list contains 703 items for moderation, exceeding the maximum allowable limit of 32. Please reduce the number of items.' error_param=input error_type=invalid_request_error message='OpenAI API error received' stream_error=False


InvalidRequestError: The input list contains 703 items for moderation, exceeding the maximum allowable limit of 32. Please reduce the number of items.

In [None]:
len(moderation)

In [None]:
any(any_flagged(result) for result in moderation)

In [None]:
fr2ex.tokens.report_cost(names)

In [None]:
embeddings = fr2ex.embedding.embed_many(names)

In [None]:
def guess(name: str, count: int = 5) -> None:
    """Show top guesses for similarity of name to already embedded names."""
    scores = embeddings @ fr2ex.embedding.embed(name)
    ordering = sorted(zip(scores, names), reverse=True)
    table = [(name, score) for score, name in ordering]
    return tabulate(table[:count], tablefmt='html', floatfmt='.6f')

In [None]:
guess('algorithm')

In [None]:
guess('sorting')

In [None]:
guess('algorithm-visualization')

In [None]:
guess('maven')

In [None]:
guess('find-repo')

In [None]:
guess('graph theory', count=10)

In [None]:
guess('shortest paths', count=10)

In [None]:
guess('shortest-paths', count=10)

In [None]:
guess('graph traversal', count=10)

In [None]:
guess('graph-traversal', count=10)

In [None]:
guess('graphics', count=10)

In [None]:
guess('deluge')

In [None]:
guess('source control')

In [None]:
guess('source-control')