# Main notebook - experimenting with guesses

SPDX-License-Identifier: 0BSD

This does semantic search based solely on repository name and user input.

Compare to
[`findrepo`](https://github.com/EliahKagan/newrepo-findrepo#using-findrepo).

In [1]:
import logging

import numpy as np
from tabulate import tabulate

import fr2ex

In [2]:
logging.basicConfig(level=logging.INFO)

In [3]:
names = fr2ex.remote.fetch_repo_names()

INFO:paramiko.transport:Connected (version 2.0, client OpenSSH_7.6p1)
INFO:paramiko.transport:Authentication (publickey) successful!
INFO:paramiko.transport.sftp:[chan 0] Opened sftp connection (server version 3)
INFO:paramiko.transport.sftp:[chan 0] sftp session closed.


In [4]:
len(names)

712

In [5]:
moderation = fr2ex.moderation.get_moderation(names)

INFO:root:Querying OpenAI moderation endpoint.


In [6]:
len(moderation)

712

In [7]:
any(fr2ex.moderation.any_flagged(result) for result in moderation)

False

In [8]:
fr2ex.tokens.report_cost(names)

It looks like the rate is $0.0001 per 1000 tokens. If so, the cost to
process 2019 tokens is about $0.0002019 (that is, 0.0201900 cents).


In [9]:
embeddings = fr2ex.embedding.embed_many(names)

INFO:root:Querying OpenAI embeddings endpoint.


In [10]:
def guess(name: str, count: int = 5) -> None:
    """Show top guesses for similarity of name to already embedded names."""
    scores = embeddings @ fr2ex.embedding.embed(name)
    ordering = sorted(zip(scores, names), reverse=True)
    table = [(name, score) for score, name in ordering]
    return tabulate(table[:count], tablefmt='html', floatfmt='.6f')

In [11]:
guess('algorithm')

0,1
Calculator,0.868915
Geometry,0.865605
algorithms-suggestions,0.860135
Alias,0.851912
Benchmark,0.849168


In [12]:
guess('sorting')

0,1
SortingRanges,0.932607
Sorts,0.909015
sortkey,0.859527
PartialSort,0.855033
tsort,0.840436


In [13]:
guess('algorithm-visualization')

0,1
algorithms-suggestions,0.88893
algorithms-python,0.859157
crystal-algo,0.855803
Graph-gist,0.84223
codegraph,0.832933


In [14]:
guess('maven')

0,1
try-maven,0.916247
mavener,0.879933
retry-maven,0.868123
pimpl,0.836178
try-gradle,0.833396


In [15]:
guess('find-repo')

0,1
newrepo-findrepo,0.916264
findrepo2-experiment,0.867342
backup-repos,0.864948
find-articles,0.857491
TestRepo,0.842167


In [16]:
guess('graph theory', count=10)

0,1
codegraph,0.881044
Graph,0.879316
Graph-gist,0.864527
Geometry,0.839677
wpf-graph,0.82909
graphyaml,0.820618
gh-profile,0.81466
c-sketches,0.813741
dart-sketches,0.810654
trig,0.809443


In [17]:
guess('shortest paths', count=10)

0,1
Dijkstra,0.891189
Paths,0.838752
TreeTraversal,0.830606
Kruskal,0.816427
algorithms-suggestions,0.816149
Traverse,0.813109
Permutations,0.80653
treejs,0.801115
DFS,0.800865
kt-short,0.800361


In [18]:
guess('shortest-paths', count=10)

0,1
Dijkstra,0.892154
Paths,0.83621
TreeTraversal,0.836169
Traverse,0.819907
algorithms-suggestions,0.819159
Kruskal,0.814025
type-arrows,0.812837
kt-short,0.811253
sequence-dag,0.809242
dot-and-cross,0.806305


In [19]:
guess('graph traversal', count=10)

0,1
TreeTraversal,0.897458
Traverse,0.870135
Graph,0.86829
codegraph,0.85683
Graph-gist,0.85085
TraverseTree,0.850117
TreeTraversalAnimations,0.845033
Dijkstra,0.824661
DFS,0.823664
sequence-dag,0.819558


In [20]:
guess('graph-traversal', count=10)

0,1
TreeTraversal,0.904155
Traverse,0.879913
Graph,0.878462
Graph-gist,0.875252
codegraph,0.872352
TraverseTree,0.865361
TreeTraversalAnimations,0.851148
Dijkstra,0.842313
wpf-graph,0.841161
sequence-dag,0.837161


In [21]:
guess('graphics', count=10)

0,1
Graph,0.892189
Geometry,0.883836
printing,0.882495
Draw,0.876193
fps,0.859638
works,0.84877
coordinates,0.845267
Components,0.841966
filesystem,0.839756
typevars,0.839745


In [22]:
guess('deluge')

0,1
Flood,0.865127
foobar,0.825995
lambda-demo,0.819211
Dynamo,0.817816
yule,0.813898


In [23]:
guess('source control')

0,1
git,0.82311
Linkage,0.807303
gitscripts,0.806829
capture.net,0.803246
test-site,0.80187


In [24]:
guess('source-control')

0,1
git,0.831503
git-notes,0.830679
dot-imports,0.815899
gitscripts,0.812983
editor-configs,0.812917
