In [1]:
from collections import Counter
import os
from typing import Any, Dict, List, Tuple

import pygments
from pygments.lexers.javascript import JavascriptLexer, TypeScriptLexer
from pygments.lexers.python import PythonLexer
from pygments.lexers.jvm import JavaLexer, ScalaLexer, KotlinLexer
from pygments.lexers.go import GoLexer
from pygments.lexers.c_cpp import CppLexer, CLexer
from pygments.lexers.ruby import RubyLexer
from pygments.lexers.php import PhpLexer
from pygments.lexers.dotnet import CSharpLexer
from pygments.lexers.shell import BashLexer
from pygments.lexers.rust import RustLexer
from pygments.lexers.objective import SwiftLexer
from pygments.lexers.haskell import HaskellLexer
from tree_sitter import Language, Parser
import tree_sitter

In [2]:
def get_tree_sitter_dir() -> str:
    """
    Get tree-sitter directory.
    :return: absolute path.
    """
    return os.path.abspath('../parsers/')

In [3]:
def get_tree_sitter_so() -> str:
    """
    Get build tree-sitter `.so` location.
    :return: absolute path.
    """
    tree_sitter_dir = get_tree_sitter_dir()
    bin_loc = os.path.join(tree_sitter_dir, "build/langs.so")
    return bin_loc

In [4]:
def main() -> None:
    """
    Initialize tree-sitter library.
    :return: None.
    """
    # root directory for tree-sitter
    tree_sitter_dir = get_tree_sitter_dir()
    # grammar locations
    cpp_grammar_loc = os.path.join(tree_sitter_dir, "vendor/tree-sitter-cpp")
    java_grammar_loc = os.path.join(tree_sitter_dir, "vendor/tree-sitter-java")
    python_grammar_loc = os.path.join(tree_sitter_dir, "vendor/tree-sitter-python")
    javascript_grammar_loc = os.path.join(tree_sitter_dir, "vendor/tree-sitter-javascript")
    go_grammar_loc = os.path.join(tree_sitter_dir, "vendor/tree-sitter-go")
    ruby_grammar_loc = os.path.join(tree_sitter_dir, "vendor/tree-sitter-ruby")
    typescript_grammar_loc = os.path.join(tree_sitter_dir, "vendor/tree-sitter-typescript/typescript")
    php_grammar_loc = os.path.join(tree_sitter_dir, "vendor/tree-sitter-php")
    c_sharp_grammar_loc = os.path.join(tree_sitter_dir, "vendor/tree-sitter-c-sharp")
    c_grammar_loc = os.path.join(tree_sitter_dir, "vendor/tree-sitter-c")
    bash_sharp_grammar_loc = os.path.join(tree_sitter_dir, "vendor/tree-sitter-bash")
    rust_grammar_loc = os.path.join(tree_sitter_dir, "vendor/tree-sitter-rust")
    swift_grammar_loc = os.path.join(tree_sitter_dir, "vendor/tree-sitter-swift")
    # location for library
    bin_loc = get_tree_sitter_so()
    # build everything
    Language.build_library(
        # Store the library in the `bin_loc`
        bin_loc,
        # Include languages
        [
            go_grammar_loc,
            cpp_grammar_loc,
            java_grammar_loc,
            python_grammar_loc,
            javascript_grammar_loc,
            ruby_grammar_loc,
            typescript_grammar_loc,
            php_grammar_loc,
            c_sharp_grammar_loc,
            c_grammar_loc,
            bash_sharp_grammar_loc,
            rust_grammar_loc,
            swift_grammar_loc
        ]
    )
    print("Parser successfully initialized.")

In [7]:
main()

Parser successfully initialized.


In [5]:
PARSERS = {}

In [5]:
def get_parser(lang: str) -> Parser:
    """
    Initialize parser for a specific language.
    :param lang: language to use.
    :return: parser.
    """
    global PARSERS
    if lang not in PARSERS:
        parser = Parser()
        parser.set_language(Language(get_tree_sitter_so(), lang))
        PARSERS[lang] = parser
    else:
        parser = PARSERS[lang]
    return parser

In [6]:
def get_positional_bytes(node: tree_sitter.Node) -> Tuple[int, int]:
    """
    Extract start and end byte of the tree-sitter Node.
    :param node: node on the AST.
    :return: (start byte, end byte).
    """
    start = node.start_byte
    end = node.end_byte
    return start, end

def get_tokens(content: str, lang: str) -> Tuple[Counter, set]:
    """
    Gather a Counter object of tokens in the file and their count, as well as a set of all
    encountered tokens.
    :param file: the path to the file.
    :param lang: the language of file.
    :return: a Counter object of items: token and count, and a set of all tokens.
    """
    content = bytes(content, "utf-8")
    tree = get_parser(lang).parse(content)
    root = tree.root_node
    tokens = []

    def traverse_tree(node: tree_sitter.Node) -> None:
        """
        Run down the AST (DFS) from a given node and gather tokens from its children.
        :param node: starting node.
        :return: None.
        """
        for child in node.children:
            start, end = get_positional_bytes(child)
            token = content[start:end].decode("utf-8")
            tokens.append([child.type, token])
            if len(child.children) != 0:
                try:
                    traverse_tree(child)
                except RecursionError:
                    continue

    traverse_tree(root)
    types = set()
    identifiers = []
    for token in tokens:
        if (("identifier" in token[0])) and ("scoped" not in token[0]) and ("nested" not in token[0]):
            types.add(token[0])
            identifiers.append(token[1])
    print(types)
    print(identifiers)

    #for token in tokens:
    #    if "identifier" in token[0]:
    #        print(">>> ", token[0], "\n\n", token[1], "\n\n")

In [12]:
def get_tokens_pygments(content: str, lang: str) -> Tuple[Counter, set]:
    """
    Gather a Counter object of tokens in the file and their count, as well as a set of all
    encountered tokens.
    :param file: the path to the file.
    :param lang: the language of file.
    :return: a Counter object of items: token and count, and a set of all tokens.
    """
    LEXERS = {"javascript": JavascriptLexer(),
              "python": PythonLexer(),
              "java": JavaLexer(),
              "go": GoLexer(),
              "cpp": CppLexer(),
              "ruby": RubyLexer(),
              "typescript": TypeScriptLexer(),
              "php": PhpLexer(),
              "c-sharp": CSharpLexer(),
              "c": CLexer(),
              "scala": ScalaLexer(),
              "bash": BashLexer(),
              "rust": RustLexer(),
              "swift": SwiftLexer(),
              "kotlin": KotlinLexer(),
              "haskell": HaskellLexer()}
    identifiers = []
    types = set()
    for pair in pygments.lex(content, LEXERS[lang]):
        if any(pair[0] in sublist for sublist in [pygments.token.Name, pygments.token.Comment.PreprocFile]):
            types.add(pair[0])
            identifiers.append(pair[1])
    print(types)
    print(identifiers)
    #for pair in pygments.lex(content, LEXERS[lang]):
    #    if any(pair[0] in sublist for sublist in [pygments.token.Name, pygments.token.Comment.PreprocFile]):
    #    print('(', pair[0], ')', pair[1])

In [142]:
content = """
'use strict';

const path = require('path');
const { Metadata } = require('../../packages/icon-build-helpers');

const ICONS_PACKAGE_DIR = path.resolve(__dirname, '../../packages/icons');
const sizes = [16, 20, 24, 32];

describe('@carbon/icons', () => {
  let metadata;

  beforeAll(async () => {
    metadata = await Metadata.load({
      input: ICONS_PACKAGE_DIR,
      extensions: [
        Metadata.extensions.icons,
        Metadata.extensions.deprecated,
        Metadata.extensions.moduleName,
      ],
    });
  });

  it('should export each SVG asset', async () => {
    const CarbonIconsCommonJS = require('@carbon/icons');
    const CarbonIconsESM = await import('@carbon/icons');

    for (const icon of metadata.icons) {
      const { moduleName } = icon;
      for (const size of sizes) {
        const exportName = `${moduleName}${size}`;
        expect(CarbonIconsCommonJS[exportName]).toBeDefined();
        expect(CarbonIconsESM[exportName]).toBeDefined();
      }
    }
  });

  it('should export each SVG asset as a direct path', async () => {
    for (const icon of metadata.icons) {
      const esm = path.join(
        ICONS_PACKAGE_DIR,
        'es',
        ...icon.namespace,
        icon.name
      );
      const commonjs = path.join(
        ICONS_PACKAGE_DIR,
        'lib',
        ...icon.namespace,
        icon.name
      );

      for (const size of sizes) {
        const es = path.join(esm, `${size}.js`);
        const lib = path.join(commonjs, `${size}.js`);
        expect(() => {
          require(lib);
        }).not.toThrow();
        await expect(import(es)).resolves.toBeDefined();
      }
    }
  });
});
"""

In [143]:
get_tokens(content, "javascript")

{'property_identifier', 'shorthand_property_identifier', 'identifier'}
['path', 'require', 'Metadata', 'require', 'ICONS_PACKAGE_DIR', 'path', 'resolve', '__dirname', 'sizes', 'describe', 'metadata', 'beforeAll', 'metadata', 'Metadata', 'load', 'input', 'ICONS_PACKAGE_DIR', 'extensions', 'Metadata', 'extensions', 'icons', 'Metadata', 'extensions', 'deprecated', 'Metadata', 'extensions', 'moduleName', 'it', 'CarbonIconsCommonJS', 'require', 'CarbonIconsESM', 'icon', 'metadata', 'icons', 'moduleName', 'icon', 'size', 'sizes', 'exportName', 'moduleName', 'size', 'expect', 'CarbonIconsCommonJS', 'exportName', 'toBeDefined', 'expect', 'CarbonIconsESM', 'exportName', 'toBeDefined', 'it', 'icon', 'metadata', 'icons', 'esm', 'path', 'join', 'ICONS_PACKAGE_DIR', 'icon', 'namespace', 'icon', 'name', 'commonjs', 'path', 'join', 'ICONS_PACKAGE_DIR', 'icon', 'namespace', 'icon', 'name', 'size', 'sizes', 'es', 'path', 'join', 'esm', 'size', 'lib', 'path', 'join', 'commonjs', 'size', 'expect', 'requi

In [14]:
get_tokens_pygments(content, "javascript")

{Token.Name.Other}
['path', 'require', 'Metadata', 'require', 'ICONS_PACKAGE_DIR', 'path', 'resolve', '__dirname', 'sizes', 'describe', 'metadata', 'beforeAll', 'async', 'metadata', 'await', 'Metadata', 'load', 'input', 'ICONS_PACKAGE_DIR', 'extensions', 'Metadata', 'extensions', 'icons', 'Metadata', 'extensions', 'deprecated', 'Metadata', 'extensions', 'moduleName', 'it', 'async', 'CarbonIconsCommonJS', 'require', 'CarbonIconsESM', 'await', 'icon', 'metadata', 'icons', 'moduleName', 'icon', 'size', 'sizes', 'exportName', 'moduleName', 'size', 'expect', 'CarbonIconsCommonJS', 'exportName', 'toBeDefined', 'expect', 'CarbonIconsESM', 'exportName', 'toBeDefined', 'it', 'async', 'icon', 'metadata', 'icons', 'esm', 'path', 'join', 'ICONS_PACKAGE_DIR', 'icon', 'namespace', 'icon', 'name', 'commonjs', 'path', 'join', 'ICONS_PACKAGE_DIR', 'icon', 'namespace', 'icon', 'name', 'size', 'sizes', 'es', 'path', 'join', 'esm', 'size', 'lib', 'path', 'join', 'commonjs', 'size', 'expect', 'require', 'l

In [188]:
content = """
def clone_repository(repository: str, directory: str) -> None:
    repository = repository[:8] + "user:password@" + repository[8:]
    os.system("git clone --quiet --depth 1 {repository} {directory}".format(repository=repository,
                                                                            directory=directory))
        """

In [54]:
get_tokens(content, "python")

{'identifier'}
['clone_repository', 'repository', 'str', 'directory', 'str', 'repository', 'repository', 'repository', 'os', 'system', 'format', 'repository', 'repository', 'directory', 'directory']


In [189]:
get_tokens_pygments(content, "python")

{Token.Name.Builtin, Token.Name.Function, Token.Name}
['clone_repository', 'repository', 'str', 'directory', 'str', 'repository', 'repository', 'repository', 'os', 'system', 'format', 'repository', 'repository', 'directory', 'directory']


In [56]:
content = """
package com.company.project;

import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;

@SpringBootApplication
public class Application {
    public static void main(String[] args) {
        SpringApplication.run(Application.class, args);
    }
}"""

In [57]:
get_tokens(content, "java")

{'scoped_identifier', 'type_identifier', 'identifier'}
['com.company.project', 'com.company', 'com', 'company', 'project', 'org', 'springframework', 'boot', 'SpringApplication', 'org', 'springframework', 'boot', 'autoconfigure', 'SpringBootApplication', 'SpringBootApplication', 'Application', 'main', 'String', 'args', 'SpringApplication', 'run', 'Application', 'args']


In [58]:
get_tokens_pygments(content, "java")

{Token.Name.Attribute, Token.Name.Namespace, Token.Name, Token.Name.Function, Token.Name.Class, Token.Name.Decorator}
['com.company.project', 'org.springframework.boot.SpringApplication', 'org.springframework.boot.autoconfigure.SpringBootApplication', '@SpringBootApplication', 'Application', 'main', 'String', 'args', 'SpringApplication', 'run', 'Application', 'class', 'args']


In [59]:
content = """
func makeRequestLoop(url string, jsonBytes []byte) {
	var i int
	for true {
		if _numRequestsPerThread > 0 {
			if i >= _numRequestsPerThread {
				return
			}
			i++
		}

		response, _, err := makeRequest(url, jsonBytes)

		if err != nil {
			fmt.Print(err.Error())
			continue
		}

		if response.StatusCode != 200 {
			fmt.Print(response.StatusCode)
			fmt.Print(" ")
			continue
		}

		// fmt.Print(".")

		if _requestDelay != 0 {
			time.Sleep(_requestDelay)
		}
	}
}
"""

In [60]:
get_tokens(content, 'go')

{'type_identifier', 'identifier', 'field_identifier'}
['makeRequestLoop', 'url', 'string', 'jsonBytes', 'byte', 'i', 'int', '_numRequestsPerThread', 'i', '_numRequestsPerThread', 'i', 'response', '_', 'err', 'makeRequest', 'url', 'jsonBytes', 'err', 'fmt', 'Print', 'err', 'Error', 'response', 'StatusCode', 'fmt', 'Print', 'response', 'StatusCode', 'fmt', 'Print', '_requestDelay', 'time', 'Sleep', '_requestDelay']


In [61]:
get_tokens_pygments(content, "go")

{Token.Name.Other}
['makeRequestLoop', 'url', 'jsonBytes', 'i', '_numRequestsPerThread', 'i', '_numRequestsPerThread', 'i', 'response', '_', 'err', 'makeRequest', 'url', 'jsonBytes', 'err', 'fmt', 'Print', 'err', 'Error', 'response', 'StatusCode', 'fmt', 'Print', 'response', 'StatusCode', 'fmt', 'Print', '_requestDelay', 'time', 'Sleep', '_requestDelay']


In [98]:
content = """
int x = (a) * (b);
"""

In [101]:
get_tokens(content, "cpp")

{'identifier'}
['x', 'a', 'b']


In [83]:
get_tokens_pygments(content, "cpp")

{Token.Name.Function, Token.Name}
['main', 'x', 'string', 'c', 'foo', 'x']


In [65]:
content = """
# frozen_string_literal: true

module BbbApi
  RETURNCODE_SUCCESS = "SUCCESS"

  def bbb_endpoint
    Rails.configuration.bigbluebutton_endpoint
  end

  def bbb_secret
    Rails.configuration.bigbluebutton_secret
  end

  # Sets a BigBlueButtonApi object for interacting with the API.
  def bbb(user_provider)
    if Rails.configuration.loadbalanced_configuration
      user_domain = retrieve_provider_info(user_provider)

      BigBlueButton::BigBlueButtonApi.new(remove_slash(user_domain["apiURL"]), user_domain["secret"], "0.8")
    else
      BigBlueButton::BigBlueButtonApi.new(remove_slash(bbb_endpoint), bbb_secret, "0.8")
    end
  end

  # Rereives info from the loadbalanced in regards to a Provider (or tenant).
  def retrieve_provider_info(provider, api = 'api', route = 'getUser')
    # Include Omniauth accounts under the Greenlight provider.
    raise "Provider not included." if !provider || provider.empty?

    cached_provider = Rails.cache.fetch("#{provider}/#{route}")
    # Return cached result if the value exists and cache is enabled
    return cached_provider if !cached_provider.nil? && Rails.configuration.enable_cache

    # Build the URI.
    uri = encode_bbb_url(
      Rails.configuration.loadbalancer_endpoint + api + '/',
      Rails.configuration.loadbalancer_secret,
      { name: provider },
      route
    )

    logger.info uri

    # Make the request.
    http = Net::HTTP.new(uri.host, uri.port)
    http.use_ssl = (uri.scheme == 'https')
    response = http.get(uri.request_uri)

    # Parse XML.
    doc = XmlSimple.xml_in(response.body, 'ForceArray' => false)

    raise doc['message'] unless response.is_a?(Net::HTTPSuccess)

    # Return the user credentials if the request succeeded on the loadbalancer.
    Rails.cache.fetch("#{provider}/#{route}", expires_in: 1.hours) do
      doc['user']
    end

    return doc['user'] if doc['returncode'] == 'SUCCESS'

    raise "User with provider #{provider} does not exist." if doc['messageKey'] == 'noSuchUser'
    raise "API call #{url} failed with #{doc['messageKey']}."
  end

  # Builds a request to retrieve credentials from the load balancer.
  def encode_bbb_url(base_url, secret, params, route = 'getUser')
    encoded_params = params.to_param
    string = route + encoded_params + secret
    checksum = OpenSSL::Digest.digest('sha1', string).unpack1('H*')

    URI.parse("#{base_url}#{route}?#{encoded_params}&checksum=#{checksum}")
  end

  # Removes trailing forward slash from a URL.
  def remove_slash(s)
    s.nil? ? nil : s.chomp("/")
  end
end
"""

In [66]:
get_tokens(content, "ruby")

{'symbol', 'identifier', 'constant'}
['BbbApi', 'RETURNCODE_SUCCESS', 'bbb_endpoint', 'Rails', 'configuration', 'bigbluebutton_endpoint', 'bbb_secret', 'Rails', 'configuration', 'bigbluebutton_secret', 'bbb', 'user_provider', 'Rails', 'configuration', 'loadbalanced_configuration', 'user_domain', 'retrieve_provider_info', 'user_provider', 'BigBlueButton', 'BigBlueButtonApi', 'new', 'remove_slash', 'user_domain', 'user_domain', 'BigBlueButton', 'BigBlueButtonApi', 'new', 'remove_slash', 'bbb_endpoint', 'bbb_secret', 'retrieve_provider_info', 'provider', 'api', 'route', 'raise', 'provider', 'provider', 'empty?', 'cached_provider', 'Rails', 'cache', 'fetch', 'provider', 'route', 'cached_provider', 'cached_provider', 'nil?', 'Rails', 'configuration', 'enable_cache', 'uri', 'encode_bbb_url', 'Rails', 'configuration', 'loadbalancer_endpoint', 'api', 'Rails', 'configuration', 'loadbalancer_secret', 'name', 'provider', 'route', 'logger', 'info', 'uri', 'http', 'Net', 'HTTP', 'new', 'uri', 'host

In [67]:
get_tokens_pygments(content, "ruby")

{Token.Name.Namespace, Token.Name, Token.Name.Builtin, Token.Name.Function, Token.Name.Constant}
['BbbApi', 'RETURNCODE_SUCCESS', 'bbb_endpoint', 'Rails', 'configuration', 'bigbluebutton_endpoint', 'bbb_secret', 'Rails', 'configuration', 'bigbluebutton_secret', 'bbb', 'user_provider', 'Rails', 'configuration', 'loadbalanced_configuration', 'user_domain', 'retrieve_provider_info', 'user_provider', 'BigBlueButton', 'BigBlueButtonApi', 'new', 'remove_slash', 'user_domain', 'user_domain', 'BigBlueButton', 'BigBlueButtonApi', 'new', 'remove_slash', 'bbb_endpoint', 'bbb_secret', 'retrieve_provider_info', 'provider', 'api', 'route', 'provider', 'provider', 'empty?', 'cached_provider', 'Rails', 'cache', 'fetch', 'provider', 'route', 'cached_provider', 'cached_provider', 'nil?', 'Rails', 'configuration', 'enable_cache', 'uri', 'encode_bbb_url', 'Rails', 'configuration', 'loadbalancer_endpoint', 'api', 'Rails', 'configuration', 'loadbalancer_secret', 'name', 'provider', 'route', 'logger', 'info'

In [24]:
content = """
@NgModule({
  declarations: DECLARATIONS,
  exports: DECLARATIONS,
  providers: [AngularDelegate, ModalController, PopoverController],
  imports: [CommonModule]
})
export class IonicModule {
  static forRoot(config?: IonicConfig): ModuleWithProviders<IonicModule> {
    return {
      ngModule: IonicModule,
      providers: [
        {
          provide: ConfigToken,
          useValue: config
        },
        {
          provide: APP_INITIALIZER,
          useFactory: appInitialize,
          multi: true,
          deps: [
            ConfigToken,
            DOCUMENT,
            NgZone
          ]
        }
      ]
    };
  }
}
"""

In [25]:
get_tokens(content, "typescript")

{'identifier', 'type_identifier', 'property_identifier'}
['NgModule', 'declarations', 'DECLARATIONS', 'exports', 'DECLARATIONS', 'providers', 'AngularDelegate', 'ModalController', 'PopoverController', 'imports', 'CommonModule', 'IonicModule', 'forRoot', 'config', 'IonicConfig', 'ModuleWithProviders', 'IonicModule', 'ngModule', 'IonicModule', 'providers', 'provide', 'ConfigToken', 'useValue', 'config', 'provide', 'APP_INITIALIZER', 'useFactory', 'appInitialize', 'multi', 'deps', 'ConfigToken', 'DOCUMENT', 'NgZone']


In [26]:
get_tokens(content, "tsx")

{'identifier', 'type_identifier', 'property_identifier'}
['NgModule', 'declarations', 'DECLARATIONS', 'exports', 'DECLARATIONS', 'providers', 'AngularDelegate', 'ModalController', 'PopoverController', 'imports', 'CommonModule', 'IonicModule', 'forRoot', 'config', 'IonicConfig', 'ModuleWithProviders', 'IonicModule', 'ngModule', 'IonicModule', 'providers', 'provide', 'ConfigToken', 'useValue', 'config', 'provide', 'APP_INITIALIZER', 'useFactory', 'appInitialize', 'multi', 'deps', 'ConfigToken', 'DOCUMENT', 'NgZone']


In [27]:
get_tokens_pygments(content, "typescript")

{Token.Name.Other}
['declarations', 'exports', 'providers', 'AngularDelegate', 'ModalController', 'PopoverController', 'imports', 'CommonModule', 'IonicModule', 'forRoot', 'config?', 'ModuleWithProviders', 'IonicModule', 'ngModule', 'providers', 'provide', 'useValue', 'provide', 'useFactory', 'multi', 'deps', 'ConfigToken', 'DOCUMENT', 'NgZone']


In [120]:
content = """
<?php

$factory->define(Form::class, function (Faker $faker) {
    return [
        'name'      => $faker->name,
        'file_path' => $faker->url,
        'school_id' => factory(School::class)->create()->id,
        'user_id'   => function() use ($faker) {
            if (User::count())
                return $faker->randomElement(User::pluck('id')->toArray());
            else return factory(User::class)->create()->id;
        },
    ];
});
"""

In [121]:
get_tokens(content, "php")

{'name'}
['factory', 'define', 'Form', 'class', 'Faker', 'faker', 'faker', 'name', 'faker', 'url', 'factory', 'School', 'class', 'create', 'id', 'faker', 'User', 'count', 'faker', 'randomElement', 'User', 'pluck', 'toArray', 'factory', 'User', 'class', 'create', 'id']


In [122]:
get_tokens_pygments(content, "php")

{Token.Name.Attribute, Token.Name.Other, Token.Name.Variable}
['$factory', 'define', 'Form', 'class', 'Faker', '$faker', '$faker', 'name', '$faker', 'url', 'factory', 'School', 'class', 'create', 'id', '$faker', 'User', 'count', '$faker', 'randomElement', 'User', 'pluck', 'toArray', 'factory', 'User', 'class', 'create', 'id']


In [144]:
content = """
public async Task RenewJobRequestAsync(int poolId, long requestId, Guid lockToken, string orchestrationId, TaskCompletionSource<int> firstJobRequestRenewed, CancellationToken token)
        {
            var runnerServer = HostContext.GetService<IRunnerServer>();
            TaskAgentJobRequest request = null;
            int firstRenewRetryLimit = 5;
            int encounteringError = 0;

            // renew lock during job running.
            // stop renew only if cancellation token for lock renew task been signal or exception still happen after retry.
            while (!token.IsCancellationRequested)
            {
                try
                {
                    request = await runnerServer.RenewAgentRequestAsync(poolId, requestId, lockToken, orchestrationId, token);

                    Trace.Info($"Successfully renew job request {requestId}, job is valid till {request.LockedUntil.Value}");

                    if (!firstJobRequestRenewed.Task.IsCompleted)
                    {
                        // fire first renew succeed event.
                        firstJobRequestRenewed.TrySetResult(0);
                    }

                    if (encounteringError > 0)
                    {
                        encounteringError = 0;
                        runnerServer.SetConnectionTimeout(RunnerConnectionType.JobRequest, TimeSpan.FromSeconds(60));
                        HostContext.WritePerfCounter("JobRenewRecovered");
                    }

                    // renew again after 60 sec delay
                    await HostContext.Delay(TimeSpan.FromSeconds(60), token);
                }
                catch (TaskAgentJobNotFoundException)
                {
                    // no need for retry. the job is not valid anymore.
                    Trace.Info($"TaskAgentJobNotFoundException received when renew job request {requestId}, job is no longer valid, stop renew job request.");
                    return;
                }
                catch (TaskAgentJobTokenExpiredException)
                {
                    // no need for retry. the job is not valid anymore.
                    Trace.Info($"TaskAgentJobTokenExpiredException received renew job request {requestId}, job is no longer valid, stop renew job request.");
                    return;
                }
                catch (OperationCanceledException) when (token.IsCancellationRequested)
                {
                    // OperationCanceledException may caused by http timeout or _lockRenewalTokenSource.Cance();
                    // Stop renew only on cancellation token fired.
                    Trace.Info($"job renew has been canceled, stop renew job request {requestId}.");
                    return;
                }
                catch (Exception ex)
                {
                    Trace.Error($"Catch exception during renew runner jobrequest {requestId}.");
                    Trace.Error(ex);
                    encounteringError++;

                    // retry
                    TimeSpan remainingTime = TimeSpan.Zero;
                    if (!firstJobRequestRenewed.Task.IsCompleted)
                    {
                        // retry 5 times every 10 sec for the first renew
                        if (firstRenewRetryLimit-- > 0)
                        {
                            remainingTime = TimeSpan.FromSeconds(10);
                        }
                    }
                    else
                    {
                        // retry till reach lockeduntil + 5 mins extra buffer.
                        remainingTime = request.LockedUntil.Value + TimeSpan.FromMinutes(5) - DateTime.UtcNow;
                    }

                    if (remainingTime > TimeSpan.Zero)
                    {
                        TimeSpan delayTime;
                        if (!firstJobRequestRenewed.Task.IsCompleted)
                        {
                            Trace.Info($"Retrying lock renewal for jobrequest {requestId}. The first job renew request has failed.");
                            delayTime = BackoffTimerHelper.GetRandomBackoff(TimeSpan.FromSeconds(1), TimeSpan.FromSeconds(10));
                        }
                        else
                        {
                            Trace.Info($"Retrying lock renewal for jobrequest {requestId}. Job is valid until {request.LockedUntil.Value}.");
                            if (encounteringError > 5)
                            {
                                delayTime = BackoffTimerHelper.GetRandomBackoff(TimeSpan.FromSeconds(15), TimeSpan.FromSeconds(30));
                            }
                            else
                            {
                                delayTime = BackoffTimerHelper.GetRandomBackoff(TimeSpan.FromSeconds(5), TimeSpan.FromSeconds(15));
                            }
                        }

                        // Re-establish connection to server in order to avoid affinity with server.
                        // Reduce connection timeout to 30 seconds (from 60s)
                        HostContext.WritePerfCounter("ResetJobRenewConnection");
                        await runnerServer.RefreshConnectionAsync(RunnerConnectionType.JobRequest, TimeSpan.FromSeconds(30));

                        try
                        {
                            // back-off before next retry.
                            await HostContext.Delay(delayTime, token);
                        }
                        catch (OperationCanceledException) when (token.IsCancellationRequested)
                        {
                            Trace.Info($"job renew has been canceled, stop renew job request {requestId}.");
                        }
                    }
                    else
                    {
                        Trace.Info($"Lock renewal has run out of retry, stop renew lock for jobrequest {requestId}.");
                        HostContext.WritePerfCounter("JobRenewReachLimit");
                        return;
                    }
                }
            }
        }"""

In [145]:
get_tokens(content, "c_sharp")

{'identifier'}
['Task', 'RenewJobRequestAsync', 'poolId', 'requestId', 'Guid', 'lockToken', 'orchestrationId', 'TaskCompletionSource', 'firstJobRequestRenewed', 'CancellationToken', 'token', 'runnerServer', 'HostContext', 'GetService', 'IRunnerServer', 'TaskAgentJobRequest', 'request', 'firstRenewRetryLimit', 'encounteringError', 'token', 'IsCancellationRequested', 'request', 'runnerServer', 'RenewAgentRequestAsync', 'poolId', 'requestId', 'lockToken', 'orchestrationId', 'token', 'Trace', 'Info', 'requestId', 'request', 'LockedUntil', 'Value', 'firstJobRequestRenewed', 'Task', 'IsCompleted', 'firstJobRequestRenewed', 'TrySetResult', 'encounteringError', 'encounteringError', 'runnerServer', 'SetConnectionTimeout', 'RunnerConnectionType', 'JobRequest', 'TimeSpan', 'FromSeconds', 'HostContext', 'WritePerfCounter', 'HostContext', 'Delay', 'TimeSpan', 'FromSeconds', 'token', 'TaskAgentJobNotFoundException', 'Trace', 'Info', 'requestId', 'TaskAgentJobTokenExpiredException', 'Trace', 'Info', 

In [146]:
get_tokens_pygments(content, "c-sharp")

{Token.Name.Function, Token.Name}
['Task', 'RenewJobRequestAsync', 'poolId', 'requestId', 'Guid', 'lockToken', 'orchestrationId', 'TaskCompletionSource', 'firstJobRequestRenewed', 'CancellationToken', 'token', 'runnerServer', 'HostContext', 'GetService', 'IRunnerServer', 'TaskAgentJobRequest', 'request', 'firstRenewRetryLimit', 'encounteringError', 'token', 'IsCancellationRequested', 'request', 'runnerServer', 'RenewAgentRequestAsync', 'poolId', 'requestId', 'lockToken', 'orchestrationId', 'token', 'Trace', 'Info', 'firstJobRequestRenewed', 'Task', 'IsCompleted', 'firstJobRequestRenewed', 'TrySetResult', 'encounteringError', 'encounteringError', 'runnerServer', 'SetConnectionTimeout', 'RunnerConnectionType', 'JobRequest', 'TimeSpan', 'FromSeconds', 'HostContext', 'WritePerfCounter', 'HostContext', 'Delay', 'TimeSpan', 'FromSeconds', 'token', 'TaskAgentJobNotFoundException', 'Trace', 'Info', 'TaskAgentJobTokenExpiredException', 'Trace', 'Info', 'OperationCanceledException', 'when', 'tok

In [153]:
content = """
#include "memdebug.h" /* keep this as LAST include */

static void dump(const char *timebuf, const char *text,
                 FILE *stream, const unsigned char *ptr, size_t size,
                 trace tracetype, curl_infotype infotype);

/*
** callback for CURLOPT_DEBUGFUNCTION
*/

int tool_debug_cb(CURL *handle, curl_infotype type,
                  char *data, size_t size,
                  void *userdata)
{
  struct OperationConfig *operation = userdata;
  struct GlobalConfig *config = operation->global;
  FILE *output = config->errors;
  const char *text;
  struct timeval tv;
  char timebuf[20];
  time_t secs;

  (void)handle; /* not used */

  if(config->tracetime) {
    struct tm *now;
    static time_t epoch_offset;
    static int    known_offset;
    tv = tvnow();
    if(!known_offset) {
      epoch_offset = time(NULL) - tv.tv_sec;
      known_offset = 1;
    }
    secs = epoch_offset + tv.tv_sec;
    now = localtime(&secs);  /* not thread safe but we don't care */
    msnprintf(timebuf, sizeof(timebuf), "%02d:%02d:%02d.%06ld ",
              now->tm_hour, now->tm_min, now->tm_sec, (long)tv.tv_usec);
  }
  else
    timebuf[0] = 0;

  if(!config->trace_stream) {
    /* open for append */
    if(!strcmp("-", config->trace_dump))
      config->trace_stream = stdout;
    else if(!strcmp("%", config->trace_dump))
      /* Ok, this is somewhat hackish but we do it undocumented for now */
      config->trace_stream = config->errors;  /* aka stderr */
    else {
      config->trace_stream = fopen(config->trace_dump, FOPEN_WRITETEXT);
      config->trace_fopened = TRUE;
    }
  }"""

In [154]:
get_tokens(content, "c")

{'type_identifier', 'identifier', 'field_identifier'}
['dump', 'timebuf', 'text', 'FILE', 'stream', 'ptr', 'size', 'trace', 'tracetype', 'curl_infotype', 'infotype', 'tool_debug_cb', 'CURL', 'handle', 'curl_infotype', 'type', 'data', 'size', 'userdata', 'OperationConfig', 'operation', 'userdata', 'GlobalConfig', 'config', 'operation', 'global', 'FILE', 'output', 'config', 'errors', 'text', 'timeval', 'tv', 'timebuf', 'time_t', 'secs', 'handle', 'config', 'tracetime', 'tm', 'now', 'time_t', 'epoch_offset', 'known_offset', 'tv', 'tvnow', 'known_offset', 'epoch_offset', 'time', 'tv', 'tv_sec', 'known_offset', 'secs', 'epoch_offset', 'tv', 'tv_sec', 'now', 'localtime', 'secs', 'msnprintf', 'timebuf', 'timebuf', 'now', 'tm_hour', 'now', 'tm_min', 'now', 'tm_sec', 'tv', 'tv_usec', 'timebuf', 'config', 'trace_stream', 'strcmp', 'config', 'trace_dump', 'config', 'trace_stream', 'stdout', 'strcmp', 'config', 'trace_dump', 'config', 'trace_stream', 'config', 'errors', 'config', 'trace_stream', '

In [155]:
get_tokens_pygments(content, "c")

{Token.Name.Builtin, Token.Name.Function, Token.Name, Token.Comment.PreprocFile}
['"memdebug.h" /* keep this as LAST include */', 'dump', 'timebuf', 'text', 'stream', 'ptr', 'size', 'trace', 'tracetype', 'curl_infotype', 'infotype', 'tool_debug_cb', 'CURL', 'handle', 'curl_infotype', 'type', 'data', 'size', 'userdata', 'OperationConfig', 'operation', 'userdata', 'GlobalConfig', 'config', 'operation', 'global', 'output', 'config', 'errors', 'text', 'timeval', 'tv', 'timebuf', 'secs', 'handle', 'config', 'tracetime', 'tm', 'now', 'epoch_offset', 'known_offset', 'tv', 'tvnow', 'known_offset', 'epoch_offset', 'time', 'NULL', 'tv', 'tv_sec', 'known_offset', 'secs', 'epoch_offset', 'tv', 'tv_sec', 'now', 'localtime', 'secs', 'msnprintf', 'timebuf', 'timebuf', 'now', 'tm_hour', 'now', 'tm_min', 'now', 'tm_sec', 'tv', 'tv_usec', 'timebuf', 'config', 'trace_stream', 'strcmp', 'config', 'trace_dump', 'config', 'trace_stream', 'stdout', 'strcmp', 'config', 'trace_dump', 'config', 'trace_stream', 

In [191]:
content = """
package com.spotify.scio.testing.util

import java.io.IOException
import java.nio.file.FileAlreadyExistsException

import com.google.api.client.util.Sleeper
import com.google.api.services.cloudresourcemanager.CloudResourceManager
import com.google.api.services.storage.model.Bucket
import com.google.cloud.hadoop.util.{ResilientOperation, RetryDeterminer}
import org.apache.beam.sdk.extensions.gcp.options.{GcpOptions, GcsOptions}
import org.apache.beam.sdk.options.PipelineOptions
import org.apache.beam.sdk.extensions.gcp.util.gcsfs.GcsPath
import org.apache.beam.sdk.extensions.gcp.util.BackOffAdapter
import org.apache.beam.sdk.util.{BackOff, FluentBackoff}
import org.joda.time.Duration
import org.slf4j.{Logger, LoggerFactory}

private object DefaultBucket {
  private[this] val isNullOrEmpty: String => Boolean = s => !Option(s).exists(_.nonEmpty)

  def tryCreateDefaultBucket(options: PipelineOptions, crmClient: CloudResourceManager): String = {
    val gcpOptions = options.as(classOf[GcsOptions])
    val projectId = gcpOptions.getProject
    require(!isNullOrEmpty(projectId), "--project is a required option.")
    // Look up the project number, to create a default bucket with a stable
    // name with no special characters.
    var projectNumber = 0L
    try projectNumber = getProjectNumber(projectId, crmClient)
    catch {
      case e: IOException =>
        throw new RuntimeException("Unable to verify project with ID " + projectId, e)
    }
    var region = DEFAULT_REGION
    if (!isNullOrEmpty(gcpOptions.getZone)) region = getRegionFromZone(gcpOptions.getZone)
    val bucketName = "dataflow-staging-" + region + "-" + projectNumber
    LOG.info("No staging location provided, attempting to use default bucket: {}", bucketName)
    val bucket = new Bucket().setName(bucketName).setLocation(region)
    // Always try to create the bucket before checking access, so that we do not
    // race with other pipelines that may be attempting to do the same thing.
    try gcpOptions.getGcsUtil.createBucket(projectId, bucket)
    catch {
      case e: FileAlreadyExistsException =>
        LOG.debug("Bucket '{}'' already exists, verifying access.", bucketName)
      case e: IOException =>
        throw new RuntimeException("Unable create default bucket.", e)
    }
    // Once the bucket is expected to exist, verify that it is correctly owned
    // by the project executing the job.
    try {
      val owner = gcpOptions.getGcsUtil.bucketOwner(GcsPath.fromComponents(bucketName, ""))
      require(
        owner == projectNumber,
        s"Bucket owner does not match the project from --project: $owner vs. $projectNumber"
      )
    } catch {
      case e: IOException =>
        throw new RuntimeException(
          "Unable to determine the owner of the default bucket at gs://" +
            bucketName,
          e
        )
    }
    "gs://" + bucketName
  }

  private val BACKOFF_FACTORY = FluentBackoff.DEFAULT
    .withMaxRetries(3)
    .withInitialBackoff(Duration.millis(200))

  private val DEFAULT_REGION = "us-central1"
  private val LOG: Logger = LoggerFactory.getLogger(classOf[GcpOptions.GcpTempLocationFactory])

  private def getProjectNumber(projectId: String, crmClient: CloudResourceManager): Long =
    getProjectNumber(projectId, crmClient, BACKOFF_FACTORY.backoff(), Sleeper.DEFAULT)

  private def getProjectNumber(
    projectId: String,
    crmClient: CloudResourceManager,
    backoff: BackOff,
    sleeper: Sleeper
  ): Long = {
    val getProject = crmClient.projects.get(projectId)
    try {
      val project = ResilientOperation.retry(
        ResilientOperation.getGoogleRequestCallable(getProject),
        BackOffAdapter.toGcpBackOff(backoff),
        RetryDeterminer.SOCKET_ERRORS,
        classOf[IOException],
        sleeper
      )
      project.getProjectNumber
    } catch {
      case e: Exception =>
        throw new IOException("Unable to get project number", e)
    }
  }

  private def getRegionFromZone(zone: String): String = {
    val zoneParts = zone.split("-")
    require(zoneParts.length >= 2, s"Invalid zone provided: $zone")
    zoneParts(0) + "-" + zoneParts(1)
  }
}"""

In [192]:
get_tokens(content, "scala")

{'package_identifier', 'type_identifier', 'identifier'}
['com.spotify.scio.testing.util', 'com', 'spotify', 'scio', 'testing', 'util', 'java', 'io', 'IOException', 'java', 'nio', 'file', 'FileAlreadyExistsException', 'com', 'google', 'api', 'client', 'util', 'Sleeper', 'com', 'google', 'api', 'services', 'cloudresourcemanager', 'CloudResourceManager', 'com', 'google', 'api', 'services', 'storage', 'model', 'Bucket', 'com', 'google', 'cloud', 'hadoop', 'util', 'ResilientOperation', 'RetryDeterminer', 'org', 'apache', 'beam', 'sdk', 'extensions', 'gcp', 'options', 'GcpOptions', 'GcsOptions', 'org', 'apache', 'beam', 'sdk', 'options', 'PipelineOptions', 'org', 'apache', 'beam', 'sdk', 'extensions', 'gcp', 'util', 'gcsfs', 'GcsPath', 'org', 'apache', 'beam', 'sdk', 'extensions', 'gcp', 'util', 'BackOffAdapter', 'org', 'apache', 'beam', 'sdk', 'util', 'BackOff', 'FluentBackoff', 'org', 'joda', 'time', 'Duration', 'org', 'slf4j', 'Logger', 'LoggerFactory', 'DefaultBucket', 'this', 'isNullOrE

In [193]:
get_tokens_pygments(content, "scala")

{Token.Name.Namespace, Token.Name, Token.Name.Class}
['com.spotify.scio.testing.util', 'java.io.IOException', 'java.nio.file.FileAlreadyExistsException', 'com.google.api.client.util.Sleeper', 'com.google.api.services.cloudresourcemanager.CloudResourceManager', 'com.google.api.services.storage.model.Bucket', 'com.google.cloud.hadoop.util.', 'ResilientOperation', 'RetryDeterminer', 'org.apache.beam.sdk.extensions.gcp.options.', 'GcpOptions', 'GcsOptions', 'org.apache.beam.sdk.options.PipelineOptions', 'org.apache.beam.sdk.extensions.gcp.util.gcsfs.GcsPath', 'org.apache.beam.sdk.extensions.gcp.util.BackOffAdapter', 'org.apache.beam.sdk.util.', 'BackOff', 'FluentBackoff', 'org.joda.time.Duration', 'org.slf4j.', 'Logger', 'LoggerFactory', 'DefaultBucket', 'isNullOrEmpty', 'Boolean', 's', 'Option', 's', 'exists', 'nonEmpty', 'tryCreateDefaultBucket', 'options', 'crmClient', 'gcpOptions', 'options', 'as', 'classOf', 'projectId', 'gcpOptions', 'getProject', 'require', 'isNullOrEmpty', 'project

In [212]:
content = """
EXEC_FILES="git-flow"
SCRIPT_FILES="git-flow-init git-flow-feature git-flow-hotfix git-flow-release git-flow-support git-flow-version gitflow-common gitflow-shFlags"
SUBMODULE_FILE="gitflow-shFlags"

echo "### gitflow no-make installer ###"

case "$1" in
	uninstall)
		echo "Uninstalling git-flow from $INSTALL_PREFIX"
		if [ -d "$INSTALL_PREFIX" ] ; then
			for script_file in $SCRIPT_FILES $EXEC_FILES ; do
				echo "rm -vf $INSTALL_PREFIX/$script_file"
				rm -vf "$INSTALL_PREFIX/$script_file"
			done
		else
			echo "The '$INSTALL_PREFIX' directory was not found."
			echo "Do you need to set INSTALL_PREFIX ?"
		fi
		exit
		;;
	help)
		echo "Usage: [environment] gitflow-installer.sh [install|uninstall]"
		echo "Environment:"
		echo "   INSTALL_PREFIX=$INSTALL_PREFIX"
		echo "   REPO_HOME=$REPO_HOME"
		echo "   REPO_NAME=$REPO_NAME"
		exit
		;;
	*)
		echo "Installing git-flow to $INSTALL_PREFIX"
		if [ -d "$REPO_NAME" -a -d "$REPO_NAME/.git" ] ; then
			echo "Using existing repo: $REPO_NAME"
		else
			echo "Cloning repo from GitHub to $REPO_NAME"
			git clone "$REPO_HOME" "$REPO_NAME"
		fi
		if [ -f "$REPO_NAME/$SUBMODULE_FILE" ] ; then
			echo "Submodules look up to date"
		else
			echo "Updating submodules"
			lastcwd=$PWD
			cd "$REPO_NAME"
			git submodule init
			git submodule update
			cd "$lastcwd"
		fi
		install -v -d -m 0755 "$INSTALL_PREFIX"
		for exec_file in $EXEC_FILES ; do
			install -v -m 0755 "$REPO_NAME/$exec_file" "$INSTALL_PREFIX"
		done
		for script_file in $SCRIPT_FILES ; do
			install -v -m 0644 "$REPO_NAME/$script_file" "$INSTALL_PREFIX"
		done
		exit
		;;
esac"""

In [213]:
get_tokens(content, "bash")

{'variable_name', 'command_name'}
['EXEC_FILES', 'SCRIPT_FILES', 'SUBMODULE_FILE', 'echo', '1', 'echo', 'INSTALL_PREFIX', 'INSTALL_PREFIX', 'script_file', 'SCRIPT_FILES', 'EXEC_FILES', 'echo', 'INSTALL_PREFIX', 'script_file', 'rm', 'INSTALL_PREFIX', 'script_file', 'echo', 'INSTALL_PREFIX', 'echo', 'exit', 'echo', 'echo', 'echo', 'INSTALL_PREFIX', 'echo', 'REPO_HOME', 'echo', 'REPO_NAME', 'exit', 'echo', 'INSTALL_PREFIX', 'REPO_NAME', 'REPO_NAME', 'echo', 'REPO_NAME', 'echo', 'REPO_NAME', 'git', 'REPO_HOME', 'REPO_NAME', 'REPO_NAME', 'SUBMODULE_FILE', 'echo', 'echo', 'lastcwd', 'PWD', 'cd', 'REPO_NAME', 'git', 'git', 'cd', 'lastcwd', 'install', 'INSTALL_PREFIX', 'exec_file', 'EXEC_FILES', 'install', 'REPO_NAME', 'exec_file', 'INSTALL_PREFIX', 'script_file', 'SCRIPT_FILES', 'install', 'REPO_NAME', 'script_file', 'INSTALL_PREFIX', 'exit']


In [214]:
get_tokens_pygments(content, "bash")

{Token.Name.Builtin, Token.Name.Variable}
['EXEC_FILES', 'SCRIPT_FILES', 'SUBMODULE_FILE', 'echo', '$1', 'echo', '$INSTALL_PREFIX', '$INSTALL_PREFIX', '$SCRIPT_FILES', '$EXEC_FILES', 'echo', '$INSTALL_PREFIX', '$script_file', '$INSTALL_PREFIX', '$script_file', 'echo', '$INSTALL_PREFIX', 'echo', 'exit', 'help', 'echo', 'echo', 'echo', '$INSTALL_PREFIX', 'echo', '$REPO_HOME', 'echo', '$REPO_NAME', 'exit', 'echo', '$INSTALL_PREFIX', '$REPO_NAME', '$REPO_NAME', 'echo', '$REPO_NAME', 'echo', '$REPO_NAME', '$REPO_HOME', '$REPO_NAME', '$REPO_NAME', '$SUBMODULE_FILE', 'echo', 'echo', 'lastcwd', '$PWD', 'cd', '$REPO_NAME', 'cd', '$lastcwd', '$INSTALL_PREFIX', '$EXEC_FILES', '$REPO_NAME', '$exec_file', '$INSTALL_PREFIX', '$SCRIPT_FILES', '$REPO_NAME', '$script_file', '$INSTALL_PREFIX', 'exit']


In [217]:
content = """
#[macro_use]
extern crate structopt;

use std::fs::File;
use std::io;
use std::io::Write;
use std::path::Path;

use ray;

#[derive(StructOpt)]
struct Args {
    #[structopt(short = "f")]
    /// The file to output the image to
    file_out: Option<String>,

    #[structopt(short = "m", long = "height", default_value = "200")]
    /// The height
    height: i32,

    #[structopt(short = "n", long = "width", default_value = "200")]
    /// The width
    width: i32,

    #[structopt(short = "s", long = "scene", default_value = "rgbbox")]
    /// The scene to show. Possible values are 'rgbbox' and 'irreg'
    scene_name: String,
}

#[paw::main]
fn main(args: Args) -> Result<(), Box<dyn std::error::Error>> {
    let mut scene = match args.scene_name.as_ref() {
        "irreg" => (*ray::sample_scenes::IRREG).clone(),
        "rgbbox" => (*ray::sample_scenes::RGBBOX).clone(),
        s => panic!("Invalid scene: {}", s),
    };

    let (objs, cam) = ray::from_scene(args.width, args.height, &mut scene);

    let result = ray::render(&objs, args.width, args.height, &cam);

    let out_writer = match args.file_out {
        Some(x) => {
            let path = Path::new(&x);
            Box::new(File::create(&path).unwrap()) as Box<dyn Write>
        }
        None => Box::new(io::stdout()) as Box<dyn Write>,
    };

    ray::image2ppm(out_writer, result)?;

    Ok(())
}"""

In [220]:
get_tokens(content, "rust")

{'type_identifier', 'identifier', 'field_identifier'}
['macro_use', 'structopt', 'std', 'fs', 'File', 'std', 'io', 'std', 'io', 'Write', 'std', 'path', 'Path', 'ray', 'derive', 'StructOpt', 'Args', 'structopt', 'short', 'file_out', 'Option', 'String', 'structopt', 'short', 'long', 'default_value', 'height', 'structopt', 'short', 'long', 'default_value', 'width', 'structopt', 'short', 'long', 'default_value', 'scene_name', 'String', 'paw', 'main', 'main', 'args', 'Args', 'Result', 'Box', 'std', 'error', 'Error', 'scene', 'args', 'scene_name', 'as_ref', 'ray', 'sample_scenes', 'IRREG', 'clone', 'ray', 'sample_scenes', 'RGBBOX', 'clone', 's', 'panic', 's', 'objs', 'cam', 'ray', 'from_scene', 'args', 'width', 'args', 'height', 'scene', 'result', 'ray', 'render', 'objs', 'args', 'width', 'args', 'height', 'cam', 'out_writer', 'args', 'file_out', 'Some', 'x', 'path', 'Path', 'new', 'x', 'Box', 'new', 'File', 'create', 'path', 'unwrap', 'Box', 'Write', 'None', 'Box', 'new', 'io', 'stdout', 'B

In [221]:
get_tokens_pygments(content, "rust")

{Token.Name.Builtin, Token.Name.Function, Token.Name, Token.Name.Class}
['structopt', 'std', 'fs', 'File', 'std', 'io', 'std', 'io', 'Write', 'std', 'path', 'Path', 'ray', 'Args', 'file_out', 'Option', 'String', 'height', 'width', 'scene_name', 'String', 'main', 'args', 'Args', 'Result', 'Box', 'dyn', 'std', 'error', 'Error', 'scene', 'args', 'scene_name', 'as_ref', 'ray', 'sample_scenes', 'IRREG', 'clone', 'ray', 'sample_scenes', 'RGBBOX', 'clone', 's', 'panic', 's', 'objs', 'cam', 'ray', 'from_scene', 'args', 'width', 'args', 'height', 'scene', 'result', 'ray', 'render', 'objs', 'args', 'width', 'args', 'height', 'cam', 'out_writer', 'args', 'file_out', 'Some', 'x', 'path', 'Path', 'new', 'x', 'Box', 'new', 'File', 'create', 'path', 'unwrap', 'Box', 'dyn', 'Write', 'None', 'Box', 'new', 'io', 'stdout', 'Box', 'dyn', 'Write', 'ray', 'image2ppm', 'out_writer', 'result', 'Ok']


In [239]:
content = """
extension DataStreamRequest {
    /// A closure used to validate a request that takes a `URLRequest` and `HTTPURLResponse` and returns whether the
    /// request was valid.
    public typealias Validation = (_ request: URLRequest?, _ response: HTTPURLResponse) -> ValidationResult

    /// Validates that the response has a status code in the specified sequence.
    ///
    /// If validation fails, subsequent calls to response handlers will have an associated error.
    ///
    /// - Parameter statusCode: `Sequence` of acceptable response status codes.
    ///
    /// - Returns:              The instance.
    @discardableResult
    public func validate<S: Sequence>(statusCode acceptableStatusCodes: S) -> Self where S.Iterator.Element == Int {
        return validate { [unowned self] _, response in
            self.validate(statusCode: acceptableStatusCodes, response: response)
        }
    }

    /// Validates that the response has a content type in the specified sequence.
    ///
    /// If validation fails, subsequent calls to response handlers will have an associated error.
    ///
    /// - parameter contentType: The acceptable content types, which may specify wildcard types and/or subtypes.
    ///
    /// - returns: The request.
    @discardableResult
    public func validate<S: Sequence>(contentType acceptableContentTypes: @escaping @autoclosure () -> S) -> Self where S.Iterator.Element == String {
        return validate { [unowned self] _, response in
            self.validate(contentType: acceptableContentTypes(), response: response)
        }
    }

    /// Validates that the response has a status code in the default acceptable range of 200...299, and that the content
    /// type matches any specified in the Accept HTTP header field.
    ///
    /// If validation fails, subsequent calls to response handlers will have an associated error.
    ///
    /// - Returns: The instance.
    @discardableResult
    public func validate() -> Self {
        validate(statusCode: acceptableStatusCodes).validate(contentType: self.acceptableContentTypes)
    }
}"""

In [240]:
get_tokens(content, "swift")

{'type_identifier', 'identifier'}
['DataStreamRequest', 'Validation', '_', 'request', 'URLRequest', '_', 'response', 'HTTPURLResponse', 'ValidationResult', 'discardableResult', 'validate', 'S', 'Sequence', 'statusCode', 'acceptableStatusCodes', 'S', 'Self', 'where', 'S', 'Iterator', 'Element', 'validate', 'unowned', 'self', 'response', 'in', 'self', 'validate', 'statusCode', 'acceptableStatusCodes', 'response', 'response', 'discardableResult', 'validate', 'S', 'Sequence', 'contentType', 'acceptableContentTypes', 'escaping', 'autoclosure', 'S', 'Self', 'where', 'S', 'Iterator', 'Element', 'validate', 'unowned', 'self', 'response', 'in', 'self', 'validate', 'contentType', 'acceptableContentTypes', 'response', 'response', 'discardableResult', 'validate', 'Self', 'validate', 'statusCode', 'acceptableStatusCodes', 'validate', 'contentType', 'self', 'acceptableContentTypes']


In [241]:
get_tokens_pygments(content, "swift")

{Token.Name.Builtin, Token.Name.Function, Token.Name, Token.Name.Class}
['DataStreamRequest', 'Validation', 'request', 'URLRequest', 'response', 'HTTPURLResponse', 'ValidationResult', 'discardableResult', 'validate', 'S', 'Sequence', 'statusCode', 'acceptableStatusCodes', 'S', 'S', 'Iterator', 'Element', 'Int', 'validate', 'response', 'validate', 'statusCode', 'acceptableStatusCodes', 'response', 'response', 'discardableResult', 'validate', 'S', 'Sequence', 'contentType', 'acceptableContentTypes', 'escaping', 'S', 'S', 'Iterator', 'Element', 'String', 'validate', 'response', 'validate', 'contentType', 'acceptableContentTypes', 'response', 'response', 'discardableResult', 'validate', 'validate', 'statusCode', 'acceptableStatusCodes', 'validate', 'contentType', 'acceptableContentTypes']


In [253]:
content = """
package org.jetbrains.exposed.sql.statements.jdbc

import org.jetbrains.exposed.sql.ColumnType
import org.jetbrains.exposed.sql.Transaction
import org.jetbrains.exposed.sql.statements.Statement
import org.jetbrains.exposed.sql.statements.StatementType
import org.jetbrains.exposed.sql.statements.api.ExposedConnection
import org.jetbrains.exposed.sql.statements.api.ExposedDatabaseMetadata
import org.jetbrains.exposed.sql.statements.api.ExposedSavepoint
import org.jetbrains.exposed.sql.statements.api.PreparedStatementApi
import org.jetbrains.exposed.sql.transactions.TransactionManager
import java.sql.Connection
import java.sql.PreparedStatement

class JdbcConnectionImpl(override val connection: Connection) : ExposedConnection<Connection> {

    // Oracle driver could throw excpection on catalog
    override var catalog: String
        get() =  try { connection.catalog } catch (_: Exception) { null } ?: connection.metaData.userName ?: ""
        set(value) { try { connection.catalog = value } catch (_: Exception) {} }

    override var schema: String
        get() =  try { connection.schema } catch (_: Exception) { "" }
        set(value) { try { connection.schema = value } catch (_: Exception) {} }

    override fun commit() {
        connection.commit()
    }

    override fun rollback() {
        connection.rollback()
    }

    override val isClosed get() = connection.isClosed
    override fun close() {
        connection.close()
    }

    override var autoCommit: Boolean
        get() = connection.autoCommit
        set(value) { connection.autoCommit = value }

    override var transactionIsolation: Int
        get() = connection.transactionIsolation
        set(value) { connection.transactionIsolation = value }

    private val metadata by lazy {
        JdbcDatabaseMetadataImpl(catalog, connection.metaData)
    }

    override fun <T> metadata(body: ExposedDatabaseMetadata.() -> T): T = metadata.body()

    override fun prepareStatement(sql: String, returnKeys: Boolean) : PreparedStatementApi {
        val generated = if (returnKeys)
            PreparedStatement.RETURN_GENERATED_KEYS
        else
            PreparedStatement.NO_GENERATED_KEYS
        return JdbcPreparedStatementImpl(connection.prepareStatement(sql, generated), returnKeys)
    }

    override fun prepareStatement(sql: String, columns: Array<String>): PreparedStatementApi {
        return JdbcPreparedStatementImpl(connection.prepareStatement(sql, columns), true)
    }

    override fun executeInBatch(sqls: List<String>) {
        val types = sqls.map { stmt ->
            StatementType.values().find {
                stmt.startsWith(it.name, true)
            } ?: StatementType.OTHER
        }

        check(types.none { it == StatementType.SELECT }) {
            "SELECT statements are unsupported in batch execution"
        }

        val type = types.distinct().singleOrNull() ?: StatementType.OTHER
        val prepStatement = object : Statement<Unit>(type, emptyList()) {

            override fun prepared(transaction: Transaction, sql: String): PreparedStatementApi {
                val originalStatement = super.prepared(transaction, sql.substringBefore('\n'))
                val batchStatement = connection.createStatement().apply {
                    sqls.forEach {
                        addBatch(it)
                    }
                }
                return object : PreparedStatementApi by originalStatement {
                    override fun closeIfPossible() {
                        batchStatement.close()
                        originalStatement.closeIfPossible()
                    }

                    override fun executeUpdate(): Int {
                        batchStatement.executeBatch()
                        return 0
                    }
                }
            }

            override fun PreparedStatementApi.executeInternal(transaction: Transaction) {
                executeUpdate()
            }

            override fun prepareSQL(transaction: Transaction): String = sqls.joinToString("\n")

            override fun arguments(): Iterable<Iterable<Pair<ColumnType, Any?>>> = emptyList()
        }

        prepStatement.execute(TransactionManager.current())
    }

    override fun setSavepoint(name: String): ExposedSavepoint {
        return JdbcSavepoint(name, connection.setSavepoint(name))
    }

    override fun releaseSavepoint(savepoint: ExposedSavepoint) {
        connection.releaseSavepoint((savepoint as JdbcSavepoint).savepoint)
    }

    override fun rollback(savepoint: ExposedSavepoint) {
        connection.rollback((savepoint as JdbcSavepoint).savepoint)
    }
}
"""

In [254]:
get_tokens_pygments(content, "kotlin")

{Token.Name.Property, Token.Name.Namespace, Token.Name, Token.Name.Function, Token.Name.Class}
['org.jetbrains.exposed.sql.statements.jdbc', 'org.jetbrains.exposed.sql.ColumnType', 'org.jetbrains.exposed.sql.Transaction', 'org.jetbrains.exposed.sql.statements.Statement', 'org.jetbrains.exposed.sql.statements.StatementType', 'org.jetbrains.exposed.sql.statements.api.ExposedConnection', 'org.jetbrains.exposed.sql.statements.api.ExposedDatabaseMetadata', 'org.jetbrains.exposed.sql.statements.api.ExposedSavepoint', 'org.jetbrains.exposed.sql.statements.api.PreparedStatementApi', 'org.jetbrains.exposed.sql.transactions.TransactionManager', 'java.sql.Connection', 'java.sql.PreparedStatement', 'JdbcConnectionImpl', 'connection', 'Connection', 'ExposedConnection', 'Connection', 'catalog', 'String', 'connection', 'catalog', '_', 'Exception', 'connection', 'metaData', 'userName', 'value', 'connection', 'catalog', 'value', '_', 'Exception', 'schema', 'String', 'connection', 'schema', '_', 'Except

In [8]:
content = """
{-# LANGUAGE MultiParamTypeClasses #-}
module Math.Grads.Algo.Isomorphism.Types
  ( VertexIndex
  , VComparator
  , EComparator
  , GComparable(..)
  ) where

import           Math.Grads.Graph (Graph, GraphEdge)


-- | Type alias for 'Int'.
--
type VertexIndex = Int

-- | Function that checks whether two vertices are identical.
-- Due to properties related to index of vertex,
-- like number of neighbors, we consider vertex indices instead of vertices.
--
type VComparator v1 v2 = VertexIndex -> VertexIndex -> Bool

-- | Function that checks whether two edges are identical.
-- Due to properties related to index of vertex,
-- like belonging to a cycle, we consider GraphEdge (Int, Int, e) instead of e.
--
type EComparator e1 e2 = GraphEdge e1 -> GraphEdge e2 -> Bool

-- | Type class for graphs that could be checked for isomorphism.
--
class (Graph g1, Graph g2) => GComparable g1 v1 e1 g2 v2 e2 where
  vComparator :: g1 v1 e1 -> g2 v2 e2 -> VComparator v1 v2
  eComparator :: g1 v1 e1 -> g2 v2 e2 -> EComparator e1 e2"""

In [13]:
get_tokens_pygments(content, "haskell")

{Token.Name, Token.Name.Namespace}
['Math.Grads.Algo.Isomorphism.Types', 'Math.Grads.Graph', 'v1', 'v2', 'e1', 'e2', 'e1', 'e2', 'g1', 'g2', 'g1', 'v1', 'e1', 'g2', 'v2', 'e2', 'vComparator', 'g1', 'v1', 'e1', 'g2', 'v2', 'e2', 'v1', 'v2', 'eComparator', 'g1', 'v1', 'e1', 'g2', 'v2', 'e2', 'e1', 'e2']
