# Main notebook - experimenting with guesses

SPDX-License-Identifier: 0BSD

This does semantic search based solely on repository name and user input.

Compare to
[`findrepo`](https://github.com/EliahKagan/newrepo-findrepo#using-findrepo).

In [1]:
import logging

import numpy as np
from tabulate import tabulate

import fr2ex

In [2]:
logging.basicConfig(level=logging.INFO)

In [3]:
names = fr2ex.remote.fetch_repo_names()

INFO:paramiko.transport:Connected (version 2.0, client OpenSSH_7.6p1)
INFO:paramiko.transport:Authentication (publickey) successful!
INFO:paramiko.transport.sftp:[chan 0] Opened sftp connection (server version 3)
INFO:paramiko.transport.sftp:[chan 0] sftp session closed.


In [4]:
len(names)

743

In [5]:
moderation = fr2ex.moderation.get_moderation(names)

INFO:root:Querying OpenAI moderation endpoint.


In [6]:
len(moderation)

743

In [7]:
any(fr2ex.moderation.any_flagged(result) for result in moderation)

False

In [8]:
fr2ex.tokens.report_cost(names)

It looks like the rate is $0.00010 per 1000 tokens. If so, the cost to
process 2123 tokens is about $0.0002123 (that is, 0.0212300 cents).


In [9]:
embeddings = fr2ex.embedding.embed_many(names)

INFO:root:Querying OpenAI embeddings endpoint.


In [10]:
def guess(name: str, count: int = 5) -> None:
    """Show top guesses for similarity of name to already embedded names."""
    scores = embeddings @ fr2ex.embedding.embed(name)
    ordering = sorted(zip(scores, names), reverse=True)
    table = [(name, score) for score, name in ordering]
    return tabulate(table[:count], tablefmt='html', floatfmt='.6f')

In [11]:
guess('algorithm')

0,1
Calculator,0.868892
Geometry,0.865443
algorithms-suggestions,0.860237
Alias,0.851718
crystal-algo,0.849856


In [12]:
guess('sorting')

0,1
SortingRanges,0.932489
Sorts,0.909088
sortkey,0.859276
PartialSort,0.854948
tsort,0.840355


In [13]:
guess('algorithm-visualization')

0,1
algorithms-suggestions,0.889123
algorithms-python,0.859457
crystal-algo,0.856151
Graph-gist,0.842437
codegraph,0.832969


In [14]:
guess('maven')

0,1
try-maven,0.916208
mavener,0.880028
retry-maven,0.868239
pimpl,0.836305
try-gradle,0.833471


In [15]:
guess('find-repo')

0,1
newrepo-findrepo,0.916371
findrepo2-experiment,0.867433
backup-repos,0.865052
find-articles,0.857055
TestRepo,0.842136


In [16]:
guess('graph theory', count=10)

0,1
codegraph,0.881131
Graph,0.879394
Graph-gist,0.864719
Geometry,0.83958
wpf-graph,0.829246
graphyaml,0.820538
gh-profile,0.814843
c-sketches,0.813834
sig,0.810508
trig,0.809272


In [17]:
guess('shortest paths', count=10)

0,1
Dijkstra,0.891523
Paths,0.838904
TreeTraversal,0.830625
Kruskal,0.81651
algorithms-suggestions,0.816332
Traverse,0.813229
Permutations,0.806625
treejs,0.801387
DFS,0.801053
kt-short,0.800336


In [18]:
guess('shortest-paths', count=10)

0,1
Dijkstra,0.892585
Paths,0.836293
TreeTraversal,0.836116
Traverse,0.820015
algorithms-suggestions,0.819172
Kruskal,0.814038
type-arrows,0.812833
kt-short,0.811081
sequence-dag,0.809246
bfsshortreach,0.806233


In [19]:
guess('graph traversal', count=10)

0,1
TreeTraversal,0.897433
Traverse,0.87009
Graph,0.868436
codegraph,0.856972
Graph-gist,0.851107
TraverseTree,0.850101
TreeTraversalAnimations,0.845119
Dijkstra,0.825617
DFS,0.8237
sequence-dag,0.819555


In [20]:
guess('graph-traversal', count=10)

0,1
TreeTraversal,0.904162
Traverse,0.880023
Graph,0.878427
Graph-gist,0.875338
codegraph,0.872349
TraverseTree,0.865455
TreeTraversalAnimations,0.851364
Dijkstra,0.842983
wpf-graph,0.841249
sequence-dag,0.837076


In [21]:
guess('graphics', count=10)

0,1
Graph,0.89225
Geometry,0.883731
printing,0.882585
Draw,0.876183
fps,0.859684
works,0.849026
coordinates,0.8455
Components,0.84196
filesystem,0.839882
typevars,0.839816


In [22]:
guess('deluge')

0,1
Flood,0.865004
foobar,0.825877
deltall,0.819861
lambda-demo,0.819041
runsleuthu,0.817375


In [23]:
guess('source control')

0,1
git,0.822995
gitdb,0.807633
Linkage,0.807274
gitscripts,0.806897
edits,0.806733


In [24]:
guess('source-control')

0,1
git,0.831427
git-notes,0.830786
gitdb,0.826958
example-submodule,0.818399
dot-imports,0.815807
