Skip to content

Commit

Permalink
Internal encoding can only be UTF-8.
Browse files Browse the repository at this point in the history
  • Loading branch information
brixen committed May 16, 2020
1 parent 1a13240 commit a0a9ed9
Show file tree
Hide file tree
Showing 74 changed files with 267 additions and 253 deletions.
8 changes: 2 additions & 6 deletions core/encoding.rb
Original file line number Diff line number Diff line change
Expand Up @@ -517,15 +517,11 @@ def self.default_external=(enc)
end

def self.default_internal
if undefined.equal? @default_internal
@default_internal = find "internal"
end
@default_internal
# Rubinius internal encoding is always UTF-8
end

def self.default_internal=(enc)
set_alias_index "internal", enc
@default_internal = undefined
Rubinius::Logger.system.warn "Encoding.default_internal= is deprecated. Rubinius internal encoding is always UTF-8"
end

def self.find(name)
Expand Down
32 changes: 17 additions & 15 deletions core/loader.rb
Original file line number Diff line number Diff line change
Expand Up @@ -296,15 +296,27 @@ def options(argv=ARGV)
@load_paths << dir
end

options.on "-K", "Ignored $KCODE option for compatibility"
options.on "-K", "Ignored $KCODE option for compatibility" do
msg = "The -K option is deprecated. Rubinius internal encoding is always UTF-8"
STDERR.puts msg
Rubinius::Logger.system.warn msg
end

options.on "-U", "Set Encoding.default_internal to UTF-8" do
set_default_internal_encoding('UTF-8')
msg = "The -U option is deprecated. Rubinius internal encoding is always UTF-8"
STDERR.puts msg
Rubinius::Logger.system.warn msg
end

options.on "-E", "ENC", "Set external:internal character encoding to ENC" do |enc|
ext, int = enc.split(":")
Encoding.default_external = ext if ext and !ext.empty?
set_default_internal_encoding(int) if int and !int.empty?
external, internal = enc.split(":")
Encoding.default_external = external if external and !external.empty?

if internal and !internal.empty?
msg = "The -E option setting internal encoding is deprecated. Rubinius internal encoding is always UTF-8"
STDERR.puts msg
Rubinius::Logger.system.warn msg
end
end

options.on "--main", "PATH", "Load PATH directly from CodeDB" do |path|
Expand Down Expand Up @@ -495,16 +507,6 @@ def set_program_name(name)
end
private :set_program_name

def set_default_internal_encoding(encoding)
if @default_internal_encoding_set && Encoding.default_internal.name != encoding
raise RuntimeError, "Default internal encoding already set to '#{Encoding.default_internal.name}'."
else
@default_internal_encoding_set = true
Encoding.default_internal = encoding
end
end
private :set_default_internal_encoding

def handle_rubyopt(options)
if env_opts = ENV['RUBYOPT']
Rubinius::Logger.system.write "RUBYOPT: #{env_opts}"
Expand Down
9 changes: 9 additions & 0 deletions core/string.rb
Original file line number Diff line number Diff line change
Expand Up @@ -1102,6 +1102,10 @@ def encode!(to=undefined, from=undefined, options=undefined)
options = Rubinius::Type.coerce_to options, Hash, :to_hash
end

# TODO: Only UTF-8 is allowed for internal encoding. This is a preliminary
# step to fixing all encoding-related interfaces.
to_enc = Encoding::UTF_8 unless to_enc == Encoding::UTF_8

if ascii_only? and from_enc.ascii_compatible? and to_enc and to_enc.ascii_compatible?
force_encoding to_enc
elsif to_enc and from_enc != to_enc
Expand Down Expand Up @@ -1152,6 +1156,10 @@ def end_with?(*suffixes)

def force_encoding(enc)
@encoding = Rubinius::Type.coerce_to_encoding enc

# TODO: Only UTF-8 encodings are supported internally.
return self unless @encoding.equal?(Encoding::UTF_8) or @encoding.equal?(Encoding::BINARY)

unless @ascii_only && @encoding.ascii_compatible?
@ascii_only = @valid_encoding = @num_chars = nil
end
Expand All @@ -1160,6 +1168,7 @@ def force_encoding(enc)
@valid_encoding = true
@num_chars = 0
end

self
end

Expand Down
9 changes: 2 additions & 7 deletions machine/capi/encoding.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,13 +99,8 @@ extern "C" {
}

rb_encoding *rb_default_internal_encoding(void) {
NativeMethodEnvironment* env = NativeMethodEnvironment::get();
Encoding* enc = Encoding::find(env->state(), "internal");
if(enc->nil_p()) {
return 0;
} else {
return enc->encoding();
}
// Rubinius internal encoding is always UTF-8
return 0;
}

rb_encoding *rb_default_external_encoding(void) {
Expand Down
13 changes: 2 additions & 11 deletions machine/class/encoding.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@ namespace rubinius {
G(encoding)->set_const(state, "EncodingList", G(encoding_list));

G(encoding)->set_ivar(state, state->symbol("@default_external"), G(undefined));
G(encoding)->set_ivar(state, state->symbol("@default_internal"), G(undefined));
G(encoding)->set_ivar(state, state->symbol("@filesystem_encoding"), G(undefined));

Encoding* binary = create_bootstrap(state, "ASCII-8BIT", eBinary, ONIG_ENCODING_ASCII);
Expand Down Expand Up @@ -229,16 +228,8 @@ namespace rubinius {
}

Encoding* Encoding::default_internal(STATE) {
Encoding* enc;
Symbol* default_internal = state->symbol("default_internal");
Object* obj = G(encoding)->get_ivar(state, default_internal);

if(!(enc = try_as<Encoding>(obj))) {
enc = Encoding::find(state, "internal");
G(encoding)->set_ivar(state, default_internal, enc);
}

return enc;
// Rubinius internal encoding is always UTF-8
return nil<Encoding>();
}

Encoding* Encoding::filesystem_encoding(STATE) {
Expand Down
7 changes: 0 additions & 7 deletions spec/core/mirror/string/byte_index_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -55,12 +55,5 @@
it "returns nil if the search String is not a complete character" do
string_mirror("あそこ").byte_index("\xe3\x82").should be_nil
end

it "raises an ArgumentError if the encoding of String and pattern are incompatible" do
pattern = "こ".encode Encoding::EUC_JP
lambda do
string_mirror("あそこ").byte_index pattern
end.should raise_error(ArgumentError)
end
end
end
6 changes: 0 additions & 6 deletions spec/core/string/byte_to_character_index_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,20 +6,14 @@

before :each do
@utf8 = Rubinius::Mirror.reflect("hello 椎名深夏" * 1000)
@utf32 = Rubinius::Mirror.reflect(@utf8.object.encode(Encoding::UTF_32BE))
@shift_jis = Rubinius::Mirror.reflect(@utf8.object.encode(Encoding::SHIFT_JIS))
end

it "returnse character index for the character containing the byte at the given byte index" do
@utf8.byte_to_character_index(358).should == 200
@utf32.byte_to_character_index(358).should == 89
@shift_jis.byte_to_character_index(358).should == 257
end

it "returns the character index offset for the character containing the byte at the given byte index searching from given byte index" do
@utf8.byte_to_character_index(234, 642).should == 130
@utf32.byte_to_character_index(234, 1432).should == 58
@shift_jis.byte_to_character_index(234, 500).should == 168
end

end
Expand Down
6 changes: 0 additions & 6 deletions spec/core/string/character_to_byte_index_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,20 +6,14 @@

before :each do
@utf8 = Rubinius::Mirror.reflect("hello 椎名深夏" * 1000)
@utf32 = Rubinius::Mirror.reflect(@utf8.object.encode(Encoding::UTF_32BE))
@shift_jis = Rubinius::Mirror.reflect(@utf8.object.encode(Encoding::SHIFT_JIS))
end

it "returns the byte index for the given character index" do
@utf8.character_to_byte_index(358).should == 642
@utf32.character_to_byte_index(358).should == 1432
@shift_jis.character_to_byte_index(358).should == 500
end

it "returns the byte index for the given character index searching from given byte index" do
@utf8.character_to_byte_index(234, 642).should == 1064
@utf32.character_to_byte_index(234, 1432).should == 2368
@shift_jis.character_to_byte_index(234, 500).should == 828
end

end
Expand Down
5 changes: 2 additions & 3 deletions spec/ruby/command_line/dash_upper_e_spec.rb
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
describe "ruby -E" do
it "raises a RuntimeError if used with -U" do
ruby_exe("p 1",
:options => '-Eascii:ascii -U',
:args => '2>&1').should =~ /RuntimeError/
ruby_exe('', :options => '-Eascii:ascii -U', :args => '2>&1').should == \
"The -E option setting internal encoding is deprecated. Rubinius internal encoding is always UTF-8\nThe -U option is deprecated. Rubinius internal encoding is always UTF-8\n"
end
end
33 changes: 3 additions & 30 deletions spec/ruby/command_line/dash_upper_k_spec.rb
Original file line number Diff line number Diff line change
@@ -1,33 +1,6 @@
describe 'The -K command line option sets __ENCODING__' do
it "to Encoding::ASCII_8BIT with -Ka" do
ruby_exe("print __ENCODING__", :options => '-Ka').should == Encoding::ASCII_8BIT.to_s
end

it "to Encoding::ASCII_8BIT with -KA" do
ruby_exe("print __ENCODING__", :options => '-KA').should == Encoding::ASCII_8BIT.to_s
end

it "to Encoding::EUC_JP with -Ke" do
ruby_exe("print __ENCODING__", :options => '-Ke').should == Encoding::EUC_JP.to_s
end

it "to Encoding::EUC_JP with -KE" do
ruby_exe("print __ENCODING__", :options => '-KE').should == Encoding::EUC_JP.to_s
end

it "to Encoding::UTF_8 with -Ku" do
ruby_exe("print __ENCODING__", :options => '-Ku').should == Encoding::UTF_8.to_s
end

it "to Encoding::UTF_8 with -KU" do
ruby_exe("print __ENCODING__", :options => '-KU').should == Encoding::UTF_8.to_s
end

it "to Encoding::Windows_31J with -Ks" do
ruby_exe("print __ENCODING__", :options => '-Ks').should == Encoding::Windows_31J.to_s
end

it "to Encoding::Windows_31J with -KS" do
ruby_exe("print __ENCODING__", :options => '-KS').should == Encoding::Windows_31J.to_s
it "warns that -K is deprecated" do
ruby_exe('', :options => '-K', :args => '2>&1').should == \
"The -K option is deprecated. Rubinius internal encoding is always UTF-8\n"
end
end
41 changes: 3 additions & 38 deletions spec/ruby/command_line/dash_upper_u_spec.rb
Original file line number Diff line number Diff line change
@@ -1,41 +1,6 @@
describe "ruby -U" do
it "sets Encoding.default_internal to UTF-8" do
ruby_exe('print Encoding.default_internal.name',
:options => '-U').should == 'UTF-8'
end

it "does nothing different if specified multiple times" do
ruby_exe('print Encoding.default_internal.name',
:options => '-U -U').should == 'UTF-8'
end

it "is overruled by Encoding.default_internal=" do
ruby_exe('Encoding.default_internal="ascii"; print Encoding.default_internal.name',
:options => '-U').should == 'US-ASCII'
end

it "does not affect the default external encoding" do
ruby_exe('Encoding.default_external="ascii"; print Encoding.default_external.name',
:options => '-U').should == 'US-ASCII'
end

it "does not affect the source encoding" do
ruby_exe("print __ENCODING__.name",
:options => '-U -KE').should == 'EUC-JP'
ruby_exe("print __ENCODING__.name",
:options => '-KE -U').should == 'EUC-JP'
end

# I assume IO redirection will break on Windows...
it "raises a RuntimeError if used with -Eext:int" do
ruby_exe("p 1",
:options => '-U -Eascii:ascii',
:args => '2>&1').should =~ /RuntimeError/
end

it "raises a RuntimeError if used with -E:int" do
ruby_exe("p 1",
:options => '-U -E:ascii',
:args => '2>&1').should =~ /RuntimeError/
it "warns that -U is deprecated" do
ruby_exe('', :options => '-U', :args => '2>&1').should == \
"The -U option is deprecated. Rubinius internal encoding is always UTF-8\n"
end
end
10 changes: 0 additions & 10 deletions spec/ruby/core/argf/binmode_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -45,14 +45,4 @@
end
end
end

it "sets the file's encoding to ASCII-8BIT" do
argv [@bin_file, @file1] do
ARGF.binmode
ARGF.binmode?.should be_true
ARGF.gets.encoding.should == Encoding::ASCII_8BIT
ARGF.skip
ARGF.read.encoding.should == Encoding::ASCII_8BIT
end
end
end
8 changes: 0 additions & 8 deletions spec/ruby/core/argf/shared/read.rb
Original file line number Diff line number Diff line change
Expand Up @@ -73,13 +73,5 @@
Encoding.default_external = @external
Encoding.default_internal = @internal
end

it "reads the contents of the file with default encoding" do
Encoding.default_external = Encoding::ASCII_8BIT

argv [@file1_name, @file2_name] do
ARGF.send(@method, 4).encoding.should == Encoding::ASCII_8BIT
end
end
end
end
7 changes: 0 additions & 7 deletions spec/ruby/core/array/shared/inspect.rb
Original file line number Diff line number Diff line change
Expand Up @@ -120,12 +120,5 @@

array.send(@method).encoding.name.should == "US-ASCII"
end

it "raises if inspected result is not default external encoding" do
utf_16be = mock("utf_16be")
utf_16be.should_receive(:inspect).and_return("utf_16be".encode!(Encoding::UTF_16BE))

lambda { [utf_16be].send(@method) }.should raise_error(Encoding::CompatibilityError)
end
end
end
6 changes: 0 additions & 6 deletions spec/ruby/core/dir/entries_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,6 @@
entries.first.encoding.should equal(Encoding::EUC_JP)
end

it "returns entries transcoded to the default internal encoding" do
Encoding.default_internal = Encoding::EUC_KR
entries = Dir.entries File.join(DirSpecs.mock_dir, 'special')
entries.first.encoding.should equal(Encoding::EUC_KR)
end

it "raises a SystemCallError if called with a nonexistent diretory" do
lambda { Dir.entries DirSpecs.nonexistent }.should raise_error(SystemCallError)
end
Expand Down
24 changes: 12 additions & 12 deletions spec/ruby/core/encoding/compatible_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -137,18 +137,18 @@
end

it "returns the String's Encoding if it is not US-ASCII but both are ASCII only" do
[ [Encoding, "abc", Encoding::ASCII_8BIT],
[ [Encoding, "abc", Encoding::UTF_8],
[Encoding, "abc".encode("utf-8"), Encoding::UTF_8],
[Encoding, "abc".encode("euc-jp"), Encoding::EUC_JP],
[Encoding, "abc".encode("shift_jis"), Encoding::Shift_JIS],
[Encoding, "abc".encode("euc-jp"), Encoding::UTF_8],
[Encoding, "abc".encode("shift_jis"), Encoding::UTF_8],
].should be_computed_by(:compatible?, /abc/)
end

it "returns the String's Encoding if the String is not ASCII only" do
[ [Encoding, "\xff", Encoding::ASCII_8BIT],
[ [Encoding, "\xff", Encoding::UTF_8],
[Encoding, "\u3042".encode("utf-8"), Encoding::UTF_8],
[Encoding, "\xa4\xa2".force_encoding("euc-jp"), Encoding::EUC_JP],
[Encoding, "\x82\xa0".force_encoding("shift_jis"), Encoding::Shift_JIS],
[Encoding, "\xa4\xa2".force_encoding("euc-jp"), Encoding::UTF_8],
[Encoding, "\x82\xa0".force_encoding("shift_jis"), Encoding::UTF_8],
].should be_computed_by(:compatible?, /abc/)
end
end
Expand All @@ -161,18 +161,18 @@
end

it "returns the String's Encoding if it is not US-ASCII but both are ASCII only" do
[ [Encoding, "abc", Encoding::ASCII_8BIT],
[ [Encoding, "abc", Encoding::UTF_8],
[Encoding, "abc".encode("utf-8"), Encoding::UTF_8],
[Encoding, "abc".encode("euc-jp"), Encoding::EUC_JP],
[Encoding, "abc".encode("shift_jis"), Encoding::Shift_JIS],
[Encoding, "abc".encode("euc-jp"), Encoding::UTF_8],
[Encoding, "abc".encode("shift_jis"), Encoding::UTF_8],
].should be_computed_by(:compatible?, :abc)
end

it "returns the String's Encoding if the String is not ASCII only" do
[ [Encoding, "\xff", Encoding::ASCII_8BIT],
[ [Encoding, "\xff", Encoding::UTF_8],
[Encoding, "\u3042".encode("utf-8"), Encoding::UTF_8],
[Encoding, "\xa4\xa2".force_encoding("euc-jp"), Encoding::EUC_JP],
[Encoding, "\x82\xa0".force_encoding("shift_jis"), Encoding::Shift_JIS],
[Encoding, "\xa4\xa2".force_encoding("euc-jp"), Encoding::UTF_8],
[Encoding, "\x82\xa0".force_encoding("shift_jis"), Encoding::UTF_8],
].should be_computed_by(:compatible?, :abc)
end
end
Expand Down

0 comments on commit a0a9ed9

Please sign in to comment.