## Regex time

In [None]:
from typing import Generator
import tree_sitter_java as tsjava
from tree_sitter import Language, Parser, Tree, Node
import re # Regular expressions module here

In [2]:
parser = Parser(Language(tsjava.language()))
with open('Ivy.java') as f:
    source_code = f.read()
tree = parser.parse(bytes(source_code, 'utf8'))

def traverse_tree(tree: Tree) -> Generator[Node, None, None]:
    cursor = tree.walk()

    visited_children = False
    while True:
        if not visited_children:
            yield cursor.node
            if not cursor.goto_first_child():
                visited_children = True
        elif cursor.goto_next_sibling():
            visited_children = False
        elif not cursor.goto_parent():
            break

In [3]:
class_nodes = []
method_nodes = []
for node in traverse_tree(tree):
    if node.type == 'class_declaration':
        class_nodes.append(node)
    if node.type == 'method_declaration':
        method_nodes.append(node)

In [14]:
text = source_code[class_nodes[0].start_byte:class_nodes[0].end_byte]
print(text[:500])

public class Ivy {
    /**
     * Callback used to execute a set of Ivy related methods within an {@link IvyContext}.
     *
     * @see Ivy#execute(org.apache.ivy.Ivy.IvyCallback)
     */
    public interface IvyCallback {
        /**
         * Executes Ivy related job within an {@link IvyContext}
         *
         * @param ivy
         *            the {@link Ivy} instance to which this callback is related
         * @param context
         *            the {@link IvyContext} in which this 


In [5]:
# Get class declaration
# Source: https://www.reddit.com/r/regex/comments/31sdor/trying_to_match_class_names_in_java/
regex = r'^.*class\s+([a-zA-Z][a-zA-Z0-9]*)(\s*\{|)'
match = re.search(regex, text) # Use search() to get the first instance of a class declaration in case there are nested classes
match

<re.Match object; span=(0, 18), match='public class Ivy {'>

In [6]:
match.group(0)

'public class Ivy {'

In [7]:
regex = r'(.*class)|(\{$)|( )'
re.sub(regex, '', match.group(0))

'Ivy'

In [8]:
# Find all method declarations in the class (because why not)
# Source: https://stackoverflow.com/questions/68633/regex-that-will-match-a-java-method-declaration
regex = r'\w+ +\w+ *\([^\)]*\) *\{'
re.findall(regex, text)

['String getIvyVersion() {',
 'String getIvyDate() {',
 'String getIvyHomeURL() {',
 'Ivy newInstance() {',
 'Ivy newInstance(IvySettings settings) {',
 'public Ivy() {',
 'void bind() {',
 'new TransferListener() {',
 'void transferProgress(TransferEvent evt) {',
 'Object execute(IvyCallback callback) {',
 'void pushContext() {',
 'void popContext() {',
 'boolean check(URL ivyFile, String resolvername) {',
 'ResolvedModuleRevision findModule(ModuleRevisionId mrid) {',
 'void interrupt() {',
 'void interrupt(Thread operatingThread) {',
 'boolean isInterrupted() {',
 'void checkInterrupted() {',
 'String getWorkingRevision() {',
 'ResolutionCacheManager getResolutionCacheManager() {',
 'void assertBound() {',
 'void postConfigure() {',
 'String getVariable(String name) {',
 'String substitute(String str) {',
 'void setVariable(String varName, String value) {',
 'IvySettings getSettings() {',
 'EventManager getEventManager() {',
 'CheckEngine getCheckEngine() {',
 'void setCheckEngine(Ch

In [9]:
# Now find method names
idx = 4
text = source_code[method_nodes[idx].start_byte:method_nodes[idx].end_byte]
print(text)

public static Ivy newInstance() {
        Ivy ivy = new Ivy();
        ivy.bind();
        return ivy;
    }


In [10]:
regex = r'\w+ +\w+ *\([^\)]*\) *\{' # Same as before
match = re.search(regex, text) # Use search() to get the first instance of a class declaration in case there are nested classes
match

<re.Match object; span=(14, 33), match='Ivy newInstance() {'>

In [11]:
regex = r'[^\s]+\(\)'
match = re.search(regex, match.group(0))
match

<re.Match object; span=(4, 17), match='newInstance()'>

In [12]:
match.group(0)[:-2]

'newInstance'