Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Changes search index format to fasten pod search --full command. #4249

Merged
merged 12 commits into from Oct 28, 2015
4 changes: 4 additions & 0 deletions CHANGELOG.md
Expand Up @@ -8,6 +8,10 @@ To install release candidates run `[sudo] gem install cocoapods --pre`

##### Enhancements

* Improve `pod search` performance while using _`--full`_ flag
[Muhammed Yavuz Nuzumlalı](https://github.com/manuyavuz)
[cocoapods-search#8](https://github.com/CocoaPods/cocoapods-search/issues/8)

* Improve message when there is no spec in repos for dependency set in Podfile.
[Muhammed Yavuz Nuzumlalı](https://github.com/manuyavuz)
[#4430](https://github.com/CocoaPods/CocoaPods/issues/4430)
Expand Down
4 changes: 2 additions & 2 deletions Gemfile.lock
Expand Up @@ -7,7 +7,7 @@ GIT

GIT
remote: https://github.com/CocoaPods/Core.git
revision: 8dcb8cbb3694102b1b391d4c0dd483339f9e5064
revision: d66fcb7160f060198c3808eb2ff3f51edf212687
branch: master
specs:
cocoapods-core (0.39.0)
Expand Down Expand Up @@ -49,7 +49,7 @@ GIT

GIT
remote: https://github.com/CocoaPods/cocoapods-search.git
revision: b85d8a86a08b9d30cd6d4b2c156bae5215f74c12
revision: 4e7de92e477f47918d869a7c4819251633efca0d
branch: master
specs:
cocoapods-search (0.1.0)
Expand Down
2 changes: 1 addition & 1 deletion lib/cocoapods/config.rb
Expand Up @@ -274,7 +274,7 @@ def default_test_podfile_path
# @return [Pathname] The file to use to cache the search data.
#
def search_index_file
cache_root + 'search_index.yaml'
cache_root + 'search_index.json'
end

private
Expand Down
159 changes: 115 additions & 44 deletions lib/cocoapods/sources_manager.rb
Expand Up @@ -110,27 +110,26 @@ def search(dependency)
#
# @raise If no source including the set can be found.
#
# @note Full text search requires to load the specification for each
# pod, hence is considerably slower.
#
# @return [Array<Set>] The sets that contain the search term.
#
def search_by_name(query, full_text_search = false)
if full_text_search
set_names = []
query_regexp = /#{query}/i
updated_search_index.each do |name, set_data|
texts = [name]
if full_text_search
texts << set_data['authors'].to_s if set_data['authors']
texts << set_data['summary'] if set_data['summary']
texts << set_data['description'] if set_data['description']
query_word_regexps = query.split.map { |word| /#{word}/i }
query_word_results_hash = {}
updated_search_index.each_value do |word_spec_hash|
word_spec_hash.each_pair do |word, spec_symbols|
query_word_regexps.each do |query_word_regexp|
set = (query_word_results_hash[query_word_regexp] ||= Set.new)
set.merge(spec_symbols) if word =~ query_word_regexp
end
end
set_names << name unless texts.grep(query_regexp).empty?
end
sets = set_names.sort.map do |name|
aggregate.representative_set(name)
found_set_symbols = query_word_results_hash.values.reduce(:&)
sets = found_set_symbols.map do |symbol|
aggregate.representative_set(symbol.to_s)
end
# Remove nil values because representative_set return nil if no pod is found in any of the sources.
sets.compact!
else
sets = aggregate.search_by_name(query, false)
end
Expand All @@ -142,41 +141,62 @@ def search_by_name(query, full_text_search = false)
sets
end

# Creates or updates the search data and returns it. The search data
# groups by name the following information for each set:
# Returns the search data. If a saved search data exists, retrieves it from file and returns it.
# Else, creates the search data from scratch, saves it to file system, and returns it.
# Search data is grouped by source repos. For each source, it contains a hash where keys are words
# and values are the pod names containing corresponding word.
#
# For each source, list of unique words are generated from the following spec information.
# - version
# - summary
# - description
# - authors
#
# @note This operation is fairly expensive, because of the YAML
# conversion.
#
# @return [Hash{String => String}] The up to date search data.
# @return [Hash{String => Hash{String => Array<String>}}] The up to date search data.
#
def updated_search_index
index = stored_search_index || {}
all.each do |source|
source_name = source.name
unless index[source_name]
UI.print "Creating search index for spec repo '#{source_name}'.."
index[source_name] = aggregate.generate_search_index_for_source(source)
UI.puts ' Done!'
end
end
save_search_index(index)
index
end

# Returns the search data stored in the file system.
# If existing data in the file system is not valid, returns nil.
#
def stored_search_index
unless @updated_search_index
if search_index_path.exist?
require 'yaml'
stored_index = YAML.load(search_index_path.read)
if stored_index && stored_index.is_a?(Hash)
search_index = aggregate.update_search_index(stored_index)
else
search_index = aggregate.generate_search_index
require 'json'
index = JSON.parse(search_index_path.read)
if index && index.is_a?(Hash) # TODO: should we also check if hash has correct hierarchy?
return @updated_search_index = index
end
else
search_index = aggregate.generate_search_index
end

File.open(search_index_path, 'w') do |file|
file.write(search_index.to_yaml)
end
@updated_search_index = search_index
@updated_search_index = nil
end
@updated_search_index
end

# Stores given search data in the file system.
# @param [Hash] index
# Index to be saved in file system
#
def save_search_index(index)
require 'json'
@updated_search_index = index
search_index_path.open('w') do |io|
io.write(@updated_search_index.to_json)
end
end

# Allows to clear the search index.
#
attr_writer :updated_search_index
Expand All @@ -192,6 +212,48 @@ def search_index_path
extend Executable
executable :git

# Updates the stored search index if there are changes in spec repos while updating them.
# Update is performed incrementally. Only the changed pods' search data is re-generated and updated.
# @param [Hash{Source => Array<String>}] changed_spec_paths
# A hash containing changed specification paths for each source.
#
def update_search_index_if_needed(changed_spec_paths)
search_index = stored_search_index
return unless search_index
changed_spec_paths.each_pair do |source, spec_paths|
index_for_source = search_index[source.name]
next unless index_for_source && spec_paths.length > 0
updated_pods = source.pods_for_specification_paths(spec_paths)

new_index = aggregate.generate_search_index_for_changes_in_source(source, spec_paths)
# First traverse search_index and update existing words
# Removed traversed words from new_index after adding to search_index,
# so that only non existing words will remain in new_index after enumeration completes.
index_for_source.each_pair do |word, _|
if new_index[word]
index_for_source[word] |= new_index[word]
else
index_for_source[word] -= updated_pods
end
end
# Now add non existing words remained in new_index to search_index
index_for_source.merge!(new_index)
end
save_search_index(search_index)
end

# Updates search index for changed pods in background
# @param [Hash{Source => Array<String>}] changed_spec_paths
# A hash containing changed specification paths for each source.
#
def update_search_index_if_needed_in_background(changed_spec_paths)
Process.fork do
Process.daemon
update_search_index_if_needed(changed_spec_paths)
exit
end
end

# Updates the local clone of the spec-repo with the given name or of all
# the git repos if the name is omitted.
#
Expand All @@ -208,22 +270,16 @@ def update(source_name = nil, show_output = false)
sources = git_sources
end

changed_spec_paths = {}
sources.each do |source|
UI.section "Updating spec repo `#{source.name}`" do
Dir.chdir(source.repo) do
begin
output = git! %w(pull --ff-only)
UI.puts output if show_output && !config.verbose?
rescue Informative
UI.warn 'CocoaPods was not able to update the ' \
"`#{source.name}` repo. If this is an unexpected issue " \
'and persists you can inspect it running ' \
'`pod repo update --verbose`'
end
end
changed_source_paths = source.update(show_output && !config.verbose?)
changed_spec_paths[source] = changed_source_paths if changed_source_paths.count > 0
check_version_information(source.repo)
end
end
# Perform search index update operation in background.
update_search_index_if_needed_in_background(changed_spec_paths)
end

# Returns whether a source is a GIT repo.
Expand Down Expand Up @@ -482,4 +538,19 @@ def name_for_url(url)
end
end
end

class Source
extend Executable
executable :git

def update_git_repo(show_output = false)
output = git! %w(pull --ff-only)
UI.puts output if show_output
rescue
UI.warn 'CocoaPods was not able to update the ' \
"`#{name}` repo. If this is an unexpected issue " \
'and persists you can inspect it running ' \
'`pod repo update --verbose`'
end
end
end
7 changes: 7 additions & 0 deletions spec/functional/command/repo/update_spec.rb
Expand Up @@ -19,6 +19,7 @@ module Pod
`git fetch -q`
`git branch --set-upstream-to=origin/master master`
end
SourcesManager.expects(:update_search_index_if_needed_in_background).with({}).returns(nil)
lambda { command('repo', 'update').run }.should.not.raise
end

Expand All @@ -27,6 +28,12 @@ module Pod
repo2 = repo_clone('repo1', 'repo2')
repo_make_readme_change(repo1, 'Updated')
Dir.chdir(repo1) { `git commit -a -m "Update"` }
SourcesManager.expects(:update_search_index_if_needed_in_background).with do |value|
value.each_pair do |source, paths|
source.name.should == 'repo2'
paths.should == ['README']
end
end
run_command('repo', 'update', 'repo2')
(repo2 + 'README').read.should.include 'Updated'
end
Expand Down
2 changes: 1 addition & 1 deletion spec/spec_helper/pre_flight.rb
Expand Up @@ -28,7 +28,7 @@ class Context
SpecHelper.temporary_directory.mkpath

# TODO
::Pod::SourcesManager.stubs(:search_index_path).returns(temporary_directory + 'search_index.yaml')
::Pod::SourcesManager.stubs(:search_index_path).returns(temporary_directory + 'search_index.json')

old_run_requirement.bind(self).call(description, spec)
end
Expand Down
2 changes: 1 addition & 1 deletion spec/unit/config_spec.rb
Expand Up @@ -161,7 +161,7 @@ module Pod
end

it 'returns the search index file' do
@config.search_index_file.to_s.should.end_with?('search_index.yaml')
@config.search_index_file.to_s.should.end_with?('search_index.json')
end
end

Expand Down
47 changes: 28 additions & 19 deletions spec/unit/sources_manager_spec.rb
Expand Up @@ -86,17 +86,7 @@ module Pod

it "generates the search index before performing a search if it doesn't exits" do
SourcesManager.stubs(:all).returns([@test_source])
Source::Aggregate.any_instance.expects(:generate_search_index).returns('BananaLib' => {})
Source::Aggregate.any_instance.expects(:update_search_index).never
SourcesManager.updated_search_index = nil
SourcesManager.search_by_name('BananaLib', true)
end

it 'updates the search index before performing a search if it exits' do
File.open(SourcesManager.search_index_path, 'w') { |file| file.write("---\nBananaLib:\n version: 0.0.1") }
SourcesManager.stubs(:all).returns([@test_source])
Source::Aggregate.any_instance.expects(:generate_search_index).never
Source::Aggregate.any_instance.expects(:update_search_index).returns('BananaLib' => {})
Source::Aggregate.any_instance.expects(:generate_search_index_for_source).with(@test_source).returns('BananaLib' => ['BananaLib'])
SourcesManager.updated_search_index = nil
SourcesManager.search_by_name('BananaLib', true)
end
Expand All @@ -105,7 +95,7 @@ module Pod
SourcesManager.unstub(:search_index_path)
config.cache_root = Config::DEFAULTS[:cache_root]
path = SourcesManager.search_index_path.to_s
path.should.match %r{Library/Caches/CocoaPods/search_index.yaml}
path.should.match %r{Library/Caches/CocoaPods/search_index.json}
end

describe 'managing sources by URL' do
Expand Down Expand Up @@ -235,28 +225,47 @@ module Pod
describe 'Updating Sources' do
extend SpecHelper::TemporaryRepos

it 'update source backed by a git repository' do
it 'updates source backed by a git repository' do
set_up_test_repo_for_update
SourcesManager.expects(:update_search_index_if_needed_in_background).with({}).returns(nil)
SourcesManager.update(test_repo_path.basename.to_s, true)
UI.output.should.match /is up to date/
end

it 'uses the only fast forward git option' do
set_up_test_repo_for_update
SourcesManager.expects(:git!).with { |options| options.should.include? '--ff-only' }
SourcesManager.update(test_repo_path.basename.to_s, true)
end

it 'prints a warning if the update failed' do
UI.warnings = ''
set_up_test_repo_for_update
Dir.chdir(test_repo_path) do
`git remote set-url origin file:///dev/null`
end
SourcesManager.expects(:update_search_index_if_needed_in_background).with({}).returns(nil)
SourcesManager.update(test_repo_path.basename.to_s, true)
UI.warnings.should.include('not able to update the `master` repo')
end
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test can come back


it 'updates search index for changed paths if source is updated' do
prev_index = { @test_source.name => {} }
SourcesManager.expects(:stored_search_index).returns(prev_index)

SourcesManager.expects(:save_search_index).with do |value|
value[@test_source.name]['BananaLib'].should.include(:BananaLib)
value[@test_source.name]['JSONKit'].should.include(:JSONKit)
end
changed_paths = { @test_source => %w(BananaLib/1.0/BananaLib.podspec JSONKit/1.4/JSONKit.podspec) }
SourcesManager.update_search_index_if_needed(changed_paths)
end

it 'does not update search index if it does not contain source even if there are changes in source' do
prev_index = {}
SourcesManager.expects(:stored_search_index).returns(prev_index)

SourcesManager.expects(:save_search_index).with do |value|
value[@test_source.name].should.be.nil
end
changed_paths = { @test_source => %w(BananaLib/1.0/BananaLib.podspec JSONKit/1.4/JSONKit.podspec) }
SourcesManager.update_search_index_if_needed(changed_paths)
end

it 'returns whether a source is backed by a git repo' do
SourcesManager.git_repo?(SourcesManager.master_repo_dir).should.be.true
SourcesManager.git_repo?(Pathname.new('/tmp')).should.be.false
Expand Down