public
Description: A language detection library for Ruby that uses bloom filters for speed.
Homepage:
Clone URL: git://github.com/peterc/whatlanguage.git
whatlanguage / build_lang_from_wordlists.rb
100644 14 lines (10 sloc) 0.52 kb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
# Builds all of the word lists in ./wordlists/ into filter files in ./lang/
 
require 'lib/whatlanguage'
 
languages_folder = File.join(File.dirname(__FILE__), "lang")
wordlists_folder = File.join(File.dirname(__FILE__), "wordlists")
 
Dir.entries(wordlists_folder).grep(/\w/).each do |lang|
  next if lang == 'generators'
  puts "Doing #{lang}"
  filter = WhatLanguage.filter_from_dictionary(File.join(wordlists_folder, lang))
  File.open(File.join(languages_folder, lang + ".lang"), 'wb') { |f| f.write filter.dump }
end