In [2]:
import numpy as np
import random
from hashlib import blake2b
from gbytes import *

In [15]:
def okhash64(val,mod=0):
    mod = f'mod:{mod}'
    h = hash(val) * hash(mod)
    h = h * 0x0000000001000000000000000000013B
    h = h % 0xffffffffffffffff
    h = h.to_bytes(byteorder='little',length=8)
    h = gbytes(h)
    return(h)

In [120]:
def okhash63(val,mod=0,mod2=0):
    mod = f'mod:{mod},{mod2}'
    h = hash(val) * hash(mod)
    h = h * 0x0000000001000000000000000000013B
    h = h % 0xffffffffffffffff
    h = h | 0x0000000000000001 # least significant is always lit
    h = h.to_bytes(byteorder='little',length=8)
    h = gbytes(h)
    return(h)

In [182]:
def okhash63(val,mod=0,mod2=0):
    mod = f'mod:{mod},{mod2}XX'
    val = val.encode('ascii')
    mod = mod.encode('ascii')
    b = val + mod
    h = gbytes(blake2b(b).digest())[:8]
    h = gbytes.bitwise_or(h,gbytes(b'\x00\x00\x00\x00\x00\x00\x00\x01'))
    return(h)

In [285]:
def b3hash64(seq,rounds=64):
    out_array = np.ndarray(rounds,dtype=np.bool8)
    last = True
    for round in range(rounds):
        dim_sum = 0
        sim_sum = 0
        for i in range(1,len(seq)):
            h1 = okhash63(seq[i-1],mod=round,mod2=0)
            h2 = okhash63(seq[i],round,mod2=1)
            sim = gbytes.similarity(h1,h2)
            dim_sum += 2
            if sim > len(h1)*4:
                sim_sum += 1
            else:
                sim_sum -= 1
            if h1.bit_count() > len(h1)*4:
                sim_sum += 1
            else:
                sim_sum -= 1
        h1 = okhash63(seq[-1],mod=round,mod2=0)
        dim_sum +=1
        if h1.bit_count() > len(h1)*4:
            sim_sum += 1
        else:
            sim_sum -= 1
        if sim_sum > 0:
            out_array[round] = True
            last = True
        elif sim_sum < 0:
            out_array[round] = False
            last = False
        else:
            last = (not last)
            out_array[round] = last # deterministic tie-breaker
    out_array = np.packbits(out_array)
    out_array = gbytes(out_array)
    return(out_array)


In [286]:
cats = b3hash64('cat')
caats = b3hash64('caats')
caaaats = b3hash64('caaaats')
catscats = b3hash64('catscats')
bcatscats = b3hash64('bcatscats')
caattaats = b3hash64('caattaats')


In [287]:
elephant = b3hash64('elephant')
eleephant = b3hash64('eleephant')

In [288]:
alice = b3hash64('Alice was beginning to get very tired of sitting by her sister on the bank, and of having nothing to do')
april = b3hash64('April was finally tired of sitting by her brother by the tree, and of having nothing to do')
long_sentence = b3hash64('This is an extremely long sentence which should result in similarity for some reason ok bye')
longsentence = b3hash64('Thisisanextremelylongsentencewhichshouldresultinsimilarityforsomereasonokbyesomemorewordshere')

shadowsfollow = b3hash64('shadowsfollow')

In [289]:
garbage = b3hash64('qwertysjdfniabuioopkjhgfdfvbsdljfnjkasdnxcvbkxjcvdfkjnndmmbyasdkjfbkhbsdfkzxcvibekxcbvkasdljfnkjksbdf')


In [290]:
garbage2 = b3hash64(str([random.randint(87,122) for _ in range(5)]))

In [291]:
gbytes.similarity(alice,april) / 64

0.90625

In [292]:
gbytes.similarity(april,garbage2) / 64

0.546875

In [294]:
gbytes.similarity(long_sentence,april) / 64

0.6875

In [295]:
gbytes.similarity(longsentence,april) / 64

0.5625

In [296]:
gbytes.similarity(longsentence,long_sentence) / 64

0.78125

In [298]:
gbytes.similarity(garbage,longsentence) / 64

0.4375

In [299]:
gbytes.similarity(caattaats,shadowsfollow) / 64

0.640625

In [300]:
gbytes.similarity(garbage2,april) / 64

0.546875