-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmultiplex_constants.rb
138 lines (113 loc) · 4.09 KB
/
multiplex_constants.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
# This script indexes the classes and modules within a set of files using the
# saved source functionality.
require "prism"
require "etc"
require "tempfile"
module Indexer
# A class that implements the #enter functionality so that it can be passed to
# the various save* APIs. This effectively bundles up all of the node_id and
# field_name pairs so that they can be written back to the parent process.
class Repository
attr_reader :scope, :entries
def initialize
@scope = []
@entries = []
end
def with(next_scope)
previous_scope = scope
@scope = scope + next_scope
yield
@scope = previous_scope
end
def empty?
entries.empty?
end
def enter(node_id, field_name)
entries << [scope.join("::"), node_id, field_name]
end
end
# Visit the classes and modules in the AST and save their locations into the
# repository.
class Visitor < Prism::Visitor
attr_reader :repository
def initialize(repository)
@repository = repository
end
def visit_class_node(node)
repository.with(node.constant_path.full_name_parts) do
node.constant_path.save_location(repository)
visit(node.body)
end
end
def visit_module_node(node)
repository.with(node.constant_path.full_name_parts) do
node.constant_path.save_location(repository)
visit(node.body)
end
end
end
# Index the classes and modules within a file. If there are any entries,
# return them as a serialized string to the parent process.
def self.index(filepath)
repository = Repository.new
Prism.parse_file(filepath).value.accept(Visitor.new(repository))
"#{filepath}|#{repository.entries.join("|")}" unless repository.empty?
end
end
def index_glob(glob, count = Etc.nprocessors - 1)
process_ids = []
filepath_writers = []
index_reader, index_writer = IO.pipe
# For each number in count, fork off a worker that has access to two pipes.
# The first pipe is the index_writer, to which it writes all of the results of
# indexing the various files. The second pipe is the filepath_reader, from
# which it reads the filepaths that it needs to index.
count.times do
filepath_reader, filepath_writer = IO.pipe
process_ids << fork do
filepath_writer.close
index_reader.close
while (filepath = filepath_reader.gets(chomp: true))
results = Indexer.index(filepath)
index_writer.puts(results) if results
end
end
filepath_reader.close
filepath_writers << filepath_writer
end
index_writer.close
# In a separate thread, write all of the filepaths to the various worker
# processes. This is done in a separate threads since puts will eventually
# block when each of the pipe buffers fills up. We write in a round-robin
# fashion to the various workers. This could be improved using a work-stealing
# algorithm, but is fine if you don't end up having a ton of variety in the
# size of your files.
writer_thread =
Thread.new do
Dir[glob].each_with_index do |filepath, index|
filepath_writers[index % count].puts(filepath)
end
end
index = Hash.new { |hash, key| hash[key] = [] }
# In a separate thread, read all of the results from the various worker
# processes and store them in the index. This is done in a separate thread so
# that reads and writes can be interleaved. This is important so that the
# index pipe doesn't fill up and block the writer.
reader_thread =
Thread.new do
while (line = index_reader.gets(chomp: true))
filepath, *entries = line.split("|")
repository = Prism::Relocation.filepath(filepath).filepath.lines.code_unit_columns(Encoding::UTF_16LE).leading_comments
entries.each_slice(3) do |(name, node_id, field_name)|
index[name] << repository.enter(Integer(node_id), field_name.to_sym)
end
end
end
writer_thread.join
filepath_writers.each(&:close)
reader_thread.join
index_reader.close
process_ids.each { |process_id| Process.wait(process_id) }
index
end
index_glob(File.expand_path("../../lib/**/*.rb", __dir__))