Skip to content

Commit

Permalink
Merge pull request #69 from BallAerospace/bugfix/66-improve-graceful-…
Browse files Browse the repository at this point in the history
…shutdown-on-linux

closes #66. Improve graceful shutdown on linux
  • Loading branch information
ryanmelt committed Feb 20, 2015
2 parents 524c080 + d71c4d8 commit 4f4a132
Show file tree
Hide file tree
Showing 9 changed files with 73 additions and 40 deletions.
20 changes: 4 additions & 16 deletions lib/cosmos/interfaces/udp_interface.rb
Expand Up @@ -91,22 +91,10 @@ def connected?

# Close the active ports (read and/or write) and set the sockets to nil.
def disconnect
begin
if @write_socket
@write_socket.close unless @write_socket.closed?
@write_socket = nil
end
rescue IOError
@write_socket = nil
end
begin
if @read_socket
@read_socket.close unless @read_socket.closed?
@read_socket = nil
end
rescue IOError
@read_socket = nil
end
Cosmos.close_socket(@write_socket)
@write_socket = nil
Cosmos.close_socket(@read_socket)
@read_socket = nil
end

# If the read port was given, the read_socket is read and the data returned
Expand Down
6 changes: 3 additions & 3 deletions lib/cosmos/io/json_drb.rb
Expand Up @@ -55,7 +55,7 @@ def initialize
def stop_service
Cosmos.kill_thread(self, @thread)
@thread = nil
@listen_socket.close if @listen_socket and !@listen_socket.closed?
Cosmos.close_socket(@listen_socket)
@listen_socket = nil
end

Expand Down Expand Up @@ -116,7 +116,7 @@ def start_service(hostname = nil, port = nil, object = nil)
end

if @acl and !@acl.allow_socket?(socket)
socket.close
Cosmos.close_socket(socket)
next
end
# Create new thread for new connection
Expand Down Expand Up @@ -270,7 +270,7 @@ def create_client_thread(socket)
break unless process_request(request_data, my_socket, start_time)
else
# Socket was closed by client
my_socket.close unless my_socket.closed?
Cosmos.close_socket(my_socket)
break
end
end
Expand Down
17 changes: 13 additions & 4 deletions lib/cosmos/io/json_drb_object.rb
Expand Up @@ -57,8 +57,7 @@ def initialize(hostname, port, connect_timeout = 1.0)

# Disconnects from the JSON server
def disconnect
socket = @socket
socket.close if socket and !socket.closed?
Cosmos.close_socket(@socket)
end

# Permanently disconnects from the JSON server
Expand Down Expand Up @@ -92,21 +91,31 @@ def method_missing(method_name, *method_params)
rescue IO::WaitWritable
begin
_, sockets, _ = IO.select(nil, [@socket], nil, @connect_timeout) # wait 3-way handshake completion
rescue Errno::ENOTSOCK
rescue IOError, Errno::ENOTSOCK
disconnect()
@socket = nil
raise "Connect canceled"
end
if sockets and !sockets.empty?
begin
@socket.connect_nonblock(addr) # check connection failure
rescue Errno::EISCONN
rescue IOError, Errno::ENOTSOCK
disconnect()
@socket = nil
raise "Connect canceled"
rescue Errno::EINPROGRESS
retry
rescue Errno::EISCONN, Errno::EALREADY
end
else
disconnect()
@socket = nil
raise "Connect timeout"
end
rescue IOError, Errno::ENOTSOCK
disconnect()
@socket = nil
raise "Connect canceled"
end
rescue => e
raise DRb::DRbConnError, e.message
Expand Down
6 changes: 2 additions & 4 deletions lib/cosmos/io/tcpip_server.rb
Expand Up @@ -187,7 +187,7 @@ def disconnect
# Shutdown Listen Socket(s)
@listen_sockets.each do |listen_socket|
begin
listen_socket.close unless listen_socket.closed?
Cosmos.close_socket(listen_socket)
rescue IOError
# Ok may have been closed by the thread
end
Expand Down Expand Up @@ -374,9 +374,7 @@ def listen_thread_body(listen_socket, listen_write, listen_read, thread_reader)
addr = ["AF_INET", 10, "lc630", host_ip.to_s]
if not System.instance.acl.allow_addr?(addr)
# Reject connection
if not socket.closed?
socket.close()
end
Cosmos.close_socket(socket)
Logger.instance.info "Tcpip server rejected connection from #{hostname}(#{host_ip}):#{port}"
return
end
Expand Down
10 changes: 8 additions & 2 deletions lib/cosmos/streams/tcpip_client_stream.rb
Expand Up @@ -88,17 +88,23 @@ def connect_nonblock(socket, addr)
rescue IO::WaitWritable
begin
_, sockets, _ = IO.select(nil, [socket], nil, @connect_timeout) # wait 3-way handshake completion
rescue Errno::ENOTSOCK
rescue IOError, Errno::ENOTSOCK
raise "Connect canceled"
end
if sockets and !sockets.empty?
begin
socket.connect_nonblock(addr) # check connection failure
rescue Errno::EISCONN
rescue IOError, Errno::ENOTSOCK
raise "Connect canceled"
rescue Errno::EINPROGRESS
retry
rescue Errno::EISCONN, Errno::EALREADY
end
else
raise "Connect timeout"
end
rescue IOError, Errno::ENOTSOCK
raise "Connect canceled"
end
end

Expand Down
6 changes: 3 additions & 3 deletions lib/cosmos/streams/tcpip_socket_stream.rb
Expand Up @@ -66,7 +66,7 @@ def read
# These can happen with the socket being closed while waiting on select
data = ''
end
rescue Errno::ECONNRESET, Errno::ECONNABORTED
rescue Errno::ECONNRESET, Errno::ECONNABORTED, IOError, Errno::ENOTSOCK
data = ''
end

Expand Down Expand Up @@ -132,8 +132,8 @@ def connected?

# Disconnect by closing the sockets
def disconnect
@write_socket.close if @write_socket and !@write_socket.closed?
@read_socket.close if @read_socket and !@read_socket.closed?
Cosmos.close_socket(@write_socket)
Cosmos.close_socket(@read_socket)
@connected = false
end

Expand Down
22 changes: 16 additions & 6 deletions lib/cosmos/tools/cmd_tlm_server/interface_thread.rb
Expand Up @@ -44,6 +44,8 @@ def initialize(interface)
@fatal_exception_callback = nil
@thread = nil
@thread_sleeper = Sleeper.new
@connection_failed_messages = []
@connection_lost_messages = []
end

# Create and start the Ruby thread that will encapsulate the interface.
Expand Down Expand Up @@ -183,15 +185,19 @@ def handle_connection_failed(connect_error)
if @connection_failed_callback
@connection_failed_callback.call(connect_error)
else
Logger.error "#{@interface.name} Connection Failed: #{connect_error.class}:#{connect_error.message}"
Logger.error "#{@interface.name} Connection Failed: #{connect_error.formatted(false, false)}"
case connect_error
when Errno::ECONNREFUSED, Errno::ECONNRESET, Errno::ETIMEDOUT, Errno::ENOTSOCK
when Errno::ECONNREFUSED, Errno::ECONNRESET, Errno::ETIMEDOUT, Errno::ENOTSOCK, Errno::EHOSTUNREACH
# Do not write an exception file for these extremely common cases
else
if RuntimeError === connect_error and (connect_error.message =~ /canceled/ or connect_error.message =~ /timeout/)
# Do not write an exception file for these extremely common cases
else
Cosmos.write_exception_file(connect_error)
Logger.error connect_error.formatted
unless @connection_failed_messages.include?(connect_error.message)
Cosmos.write_exception_file(connect_error)
@connection_failed_messages << connect_error.message
end
end
end
end
Expand All @@ -202,16 +208,20 @@ def handle_connection_lost(err)
if @connection_lost_callback
@connection_lost_callback.call(err)
else
Logger.info "Connection Lost for #{@interface.name}"
if err
Logger.info "Connection Lost for #{@interface.name}: #{err.formatted(false, false)}"
case err
when Errno::ECONNABORTED, Errno::ECONNRESET, Errno::ETIMEDOUT, Errno::EBADF
# Do not write an exception file for these extremely common cases
Logger.error err.formatted(false, false)
else
Cosmos.write_exception_file(err)
Logger.error err.formatted
unless @connection_lost_messages.include?(err.message)
Cosmos.write_exception_file(err)
@connection_lost_messages << err.message
end
end
else
Logger.info "Connection Lost for #{@interface.name}"
end
end
disconnect()
Expand Down
24 changes: 23 additions & 1 deletion lib/cosmos/top_level.rb
Expand Up @@ -16,6 +16,7 @@
require 'cosmos/core_ext'
require 'cosmos/version'
require 'cosmos/utilities/logger'
require 'socket'

# If a hazardous command is sent through the {Cosmos::Api} this error is raised.
# {Cosmos::Script} rescues the error and prompts the user to continue.
Expand Down Expand Up @@ -642,7 +643,7 @@ def self.kill_thread(owner, thread, graceful_timeout = 1, timeout_interval = 0.0
end
if thread.alive?
# Graceful failed
Logger.warn "Failed to gracefully kill thread:\n #{thread.backtrace.join("\n ")}\n"
Logger.warn "Failed to gracefully kill thread:\n Caller Backtrace:\n #{caller().join("\n ")}\n \n Thread Backtrace:\n #{thread.backtrace.join("\n ")}\n\n"
thread.kill
end_time = Time.now + hard_timeout
while thread.alive? && ((end_time - Time.now) > 0)
Expand All @@ -655,4 +656,25 @@ def self.kill_thread(owner, thread, graceful_timeout = 1, timeout_interval = 0.0
end
end

# Close a socket in a manner that ensures that any reads blocked in select
# will unblock across platforms
# @param socket The socket to close
def self.close_socket(socket)
if socket
# Calling shutdown and then sleep seems to be required
# to get select to reliably unblock on linux
begin
socket.shutdown(:RDWR)
sleep(0)
rescue Exception
# Oh well we tried
end
begin
socket.close unless socket.closed?
rescue Exception
# Oh well we tried
end
end
end

end
2 changes: 1 addition & 1 deletion spec/tools/cmd_tlm_server/interface_thread_spec.rb
Expand Up @@ -64,7 +64,7 @@ module Cosmos
sleep 0.2
Thread.list.length.should eql(1)

stdout.string.should match "Connection Failed: RuntimeError:ConnectError"
stdout.string.should match "Connection Failed: RuntimeError : ConnectError"
end
end

Expand Down

0 comments on commit 4f4a132

Please sign in to comment.