In [527]:
from tree_sitter import Language, Parser
from ranking.util import json_lines as jl
from ranking.util import dataset_paths as dp
from tqdm import tqdm
import pandas as pd

Language.build_library(
  # Store the library in the `build` directory
  'build/my-languages.so',

  # Include one or more languages
  [
    'bindings\\tree-sitter-haskell'
  ]
)

HS_LANGUAGE = Language('build\\my-languages.so', 'haskell')
parser = Parser()
parser.set_language(HS_LANGUAGE)

In [528]:
def contains_sig(text: str) -> bool:
    tree = parser.parse(bytes(text, "utf8"))
    return not tree.root_node.has_error and tree.root_node.children[0].type == 'signature'

In [594]:
def get_sig_type(sig: str, query) -> str:
    tree = parser.parse(bytes(sig, "utf8"))
    assert(tree.root_node.children[0].type == 'signature')
    captures = query.captures(tree.root_node)
    return str(captures[0][0].text, 'UTF-8')

In [582]:
dump = jl.read_jsonl('no-sig-check-raw.complete.dump.jsonl')
# dump = jl.read_jsonl(dp.unique_functions_corpus)

In [583]:
tqdm.pandas(desc='Progress')
dump['is_signature'] = dump['docItem'].progress_apply(contains_sig)

Progress: 100%|██████████| 805414/805414 [00:31<00:00, 25553.28it/s]


In [584]:
signatures = dump[dump['is_signature'] == True]
signatures
# print(len(signatures))
# print(signatures['docItem'].str.cat(sep='\n'))
# signatures[signatures['docItem'].str.split('() =>', regex=False).str.len() > 2]
# tqdm.pandas(desc='Progress')
# signatures['docType'] = ':: ' + signatures['docItem'].progress_apply(get_sig_type)

Unnamed: 0,docId,docContent,docItem,docType,docPackage,is_signature
6,978,Convert from radians to degrees.\n,degrees :: Floating x => Radians x -> Degrees x,,AC-Angle,True
7,1155,Convert from degrees to radians.\n,radians :: Floating x => Degrees x -> Radians x,,AC-Angle,True
9,1480,,"sine :: (Angle a, Floating x) => a x -> x",,AC-Angle,True
10,1634,,"cosine :: (Angle a, Floating x) => a x -> x",,AC-Angle,True
11,1789,,"tangent :: (Angle a, Floating x) => a x -> x",,AC-Angle,True
...,...,...,...,...,...,...
793115,176219740,Show all variables and equations. Useful in co...,"showVars :: (Show n, Show v, Ord n, Ord v, Flo...",,mfsolve,True
793117,176220217,,runSolverT :: MFSolverT v n m a -> Dependencie...,,mfsolve,True
793118,176220406,Return the result of solving the equations or ...,evalSolverT :: Functor f => MFSolverT v n f b ...,,mfsolve,True
793119,176220645,Run the solver and return the dependencies or ...,execSolverT :: Functor m => MFSolverT v n m a ...,,mfsolve,True


In [595]:
# tree = parser.parse(bytes('(#%%=) :: MonadState s m => ALens s s a b -> (a -> (r, b)) -> m r', "utf8"))
print(len(signatures))
query = HS_LANGUAGE.query('(signature _ type: _ _ @type)')
tqdm.pandas(desc='Progress')
signatures['docType'] = signatures['docItem'].progress_apply(lambda item: get_sig_type(item, query))
signatures
# tree = parser.parse(bytes(all_sigs, "utf8"))
# query = HS_LANGUAGE.query('(signature _ type: _ _ @type)')
# captures = query.captures(tree.root_node)
# types = pd.Series([str(ty[0].text, 'UTF-8') for ty in captures])
# signatures['docType'] = types
# signatures

# tree.root_node.children[0].children[3].text
# print(tree.root_node.sexp())

470571


Progress:  20%|██        | 95104/470571 [00:05<00:19, 18825.04it/s]


KeyboardInterrupt: 

In [590]:
signatures[signatures['docType'].str.len() == 1]

Unnamed: 0,docId,docContent,docItem,docType,docPackage,is_signature
13037,2648492,,bankSelect :: T,T,midi,True
13038,2648643,,modulation :: T,T,midi,True
13039,2648794,,breathControl :: T,T,midi,True
13040,2648946,,footControl :: T,T,midi,True
13041,2649096,,portamentoTime :: T,T,midi,True
...,...,...,...,...,...,...
654090,146307709,,empty :: T,T,battleship-combinatorics,True
654099,146309293,The main configuration given in\n<a>https://de...,german :: T,T,battleship-combinatorics,True
654100,146309505,The main configuration given in\n<a>https://en...,english :: T,T,battleship-combinatorics,True
659799,147622282,A special error value. If a property evaluates...,discard :: a,a,tasty-quickcheck,True


In [438]:
signatures[6]

KeyError: 6

In [427]:
tree = parser.parse(bytes(""":: Radians x => Radians x -> Degrees x""", "utf8"))
print(tree.root_node.sexp())
tree.root_node.children[0].children[2].text
# query = HS_LANGUAGE.query("(signature _ _ _ @type)")
# cap = query.captures(tree.root_node)
# cap[0][0].text
# print(len(cap[0]))
# print(cap[0])


Radians x => Radians x -> Degrees x
(haskell (signature name: (variable (MISSING _varid)) type: (context (constraint class: (class_name (type)) (type_name (type_variable))) (fun (type_apply (type_name (type)) (type_name (type_variable))) (type_apply (type_name (type)) (type_name (type_variable)))))))


b'Radians x => Radians x -> Degrees x'

In [605]:
tree = parser.parse(bytes("""( ## ) :: Diagram -> Diagram -> Diagram""", "utf8"))
print(tree.root_node.sexp())
q = HS_LANGUAGE.query('(signature name: )')
cap = q.captures(tree.root_node)
print(cap[0][0].text)

(haskell (signature name: (operator) type: (fun (type_name (type)) (fun (type_name (type)) (type_name (type))))))
b')'
