# Main notebook - experimenting with guesses

SPDX-License-Identifier: 0BSD

This does semantic search based solely on repository name and user input.

Compare to
[`findrepo`](https://github.com/EliahKagan/newrepo-findrepo#using-findrepo).

In [1]:
import logging

import numpy as np
from tabulate import tabulate

import fr2ex

In [2]:
logging.basicConfig(level=logging.INFO)

In [3]:
names = fr2ex.remote.fetch_repo_names()

INFO:paramiko.transport:Connected (version 2.0, client OpenSSH_7.6p1)
INFO:paramiko.transport:Authentication (publickey) successful!
INFO:paramiko.transport.sftp:[chan 0] Opened sftp connection (server version 3)
INFO:paramiko.transport.sftp:[chan 0] sftp session closed.


In [4]:
len(names)

687

In [5]:
moderation = fr2ex.moderation.get_moderation(names)

INFO:root:Querying OpenAI moderation endpoint.


In [6]:
len(moderation)

687

In [7]:
any(result['flagged'] for result in moderation)

False

In [8]:
fr2ex.tokens.report_cost(names)

It looks like the rate is $0.0004 per 1000 tokens. If so, the cost to
process 1953 tokens is about $0.0007812 (that is, 0.0781200 cents).


In [9]:
embeddings = fr2ex.embedding.embed_many(names)

INFO:root:Querying OpenAI embeddings endpoint.


In [10]:
def guess(name: str, count: int = 5) -> None:
    """Show top guesses for similarity of name to already embedded names."""
    scores = embeddings @ fr2ex.embedding.embed(name)
    ordering = sorted(zip(scores, names), reverse=True)
    table = [(name, score) for score, name in ordering]
    return tabulate(table[:count], tablefmt='html', floatfmt='.6f')

In [11]:
guess('algorithm')

0,1
Calculator,0.868882
Geometry,0.865558
algorithms-suggestions,0.860272
Alias,0.851841
crystal-algo,0.849729


In [12]:
guess('sorting')

0,1
SortingRanges,0.932616
Sorts,0.909054
sortkey,0.859575
PartialSort,0.855047
tsort,0.84036


In [13]:
guess('algorithm-visualization')

0,1
algorithms-suggestions,0.889051
algorithms-python,0.859356
crystal-algo,0.856027
Graph-gist,0.842573
codegraph,0.833195


In [14]:
guess('maven')

0,1
try-maven,0.916256
mavener,0.88006
retry-maven,0.868456
pimpl,0.836146
try-gradle,0.833637


In [15]:
guess('find-repo')

0,1
newrepo-findrepo,0.916256
findrepo2-experiment,0.867356
backup-repos,0.865059
find-articles,0.856951
TestRepo,0.84228


In [16]:
guess('graph theory', count=10)

0,1
codegraph,0.881251
Graph,0.879316
Graph-gist,0.86459
Geometry,0.839678
wpf-graph,0.829404
graphyaml,0.820673
gh-profile,0.81476
c-sketches,0.81389
trig,0.80931
Draw,0.808868


In [17]:
guess('shortest paths', count=10)

0,1
Dijkstra,0.891393
Paths,0.838839
TreeTraversal,0.830659
Kruskal,0.816414
algorithms-suggestions,0.816239
Traverse,0.813004
Permutations,0.806583
treejs,0.801292
DFS,0.800829
kt-short,0.800304


In [18]:
guess('shortest-paths', count=10)

0,1
Dijkstra,0.892473
Paths,0.836263
TreeTraversal,0.836177
Traverse,0.81981
algorithms-suggestions,0.819183
Kruskal,0.813991
type-arrows,0.812882
kt-short,0.811265
sequence-dag,0.809265
dot-and-cross,0.806301


In [19]:
guess('graph traversal', count=10)

0,1
TreeTraversal,0.897567
Traverse,0.870028
Graph,0.868365
codegraph,0.857106
Graph-gist,0.85106
TraverseTree,0.850152
TreeTraversalAnimations,0.845057
Dijkstra,0.825758
DFS,0.823796
sequence-dag,0.819746


In [20]:
guess('graph-traversal', count=10)

0,1
TreeTraversal,0.90416
Traverse,0.87986
Graph,0.878282
Graph-gist,0.87518
codegraph,0.872302
TraverseTree,0.865445
TreeTraversalAnimations,0.851086
Dijkstra,0.842921
wpf-graph,0.841239
sequence-dag,0.837043


In [21]:
guess('graphics', count=10)

0,1
Graph,0.892202
Geometry,0.883767
printing,0.882554
Draw,0.876314
fps,0.85979
works,0.848869
coordinates,0.84528
Components,0.84202
filesystem,0.839757
Benchmark,0.839163


In [22]:
guess('deluge')

0,1
Flood,0.864981
foobar,0.825881
lambda-demo,0.819231
Dynamo,0.816781
yule,0.814027


In [23]:
guess('source control')

0,1
git,0.823033
Linkage,0.807258
gitscripts,0.80684
capture.net,0.803207
test-site,0.801832


In [24]:
guess('source-control')

0,1
git,0.831379
git-notes,0.83044
dot-imports,0.81582
gitscripts,0.812936
editor-configs,0.812632
