diff --git a/AppController/djinn.rb b/AppController/djinn.rb index 88892ce31a..388f4823d2 100644 --- a/AppController/djinn.rb +++ b/AppController/djinn.rb @@ -20,27 +20,26 @@ # Imports for AppController libraries $:.unshift File.join(File.dirname(__FILE__), "lib") -require 'helperfunctions' +require 'app_controller_client' +require 'blobstore' +require 'custom_exceptions' +require 'ejabberd' +require 'error_app' +require 'collectd' require 'cron_helper' +require 'godinterface' require 'haproxy' -require 'collectd' -require 'nginx' +require 'helperfunctions' +require 'infrastructure_manager_client' +require 'neptune_manager_client' require 'pbserver' -require 'blobstore' +require 'nginx' require 'rabbitmq' -require 'app_controller_client' -require 'user_app_client' -require 'ejabberd' require 'repo' +require 'user_app_client' require 'zkinterface' -require 'godinterface' -require 'infrastructure_manager_client' -require 'neptune_manager_client' -class AppScaleException < Exception -end - WANT_OUTPUT = true @@ -1543,7 +1542,6 @@ def write_zookeeper_locations def update_api_status() - return if my_node.is_appengine? repo_host = my_node.private_ip else @@ -2129,7 +2127,7 @@ def change_job() retval = 0 while retries > 0 replication = @creds["replication"] - Djinn.log_run("MASTER_IP='localhost' LOCAL_DB_IP='localhost' python2.6 #{prime_script} #{replication}; echo $? > /tmp/retval") + Djinn.log_run("APPSCALE_HOME='#{APPSCALE_HOME}' MASTER_IP='localhost' LOCAL_DB_IP='localhost' python2.6 #{prime_script} #{replication}; echo $? > /tmp/retval") retval = `cat /tmp/retval`.to_i break if retval == 0 Djinn.log_debug("Fail to create initial table. Retry #{retries} times.") @@ -2245,7 +2243,7 @@ def start_pbserver zoo_connection = get_zk_connection_string(@nodes) PbServer.start(db_master_ip, @userappserver_private_ip, my_ip, table, zoo_connection) HAProxy.create_pbserver_config(my_node.private_ip, PbServer::PROXY_PORT, table) - Nginx.create_pbserver_config(my_ip, PbServer::PROXY_PORT) + Nginx.create_pbserver_config(my_node.private_ip, PbServer::PROXY_PORT) Nginx.restart() # TODO check the return value @@ -2741,6 +2739,26 @@ def stop_shadow() Djinn.log_debug("Stopping Shadow role") end + # + # Swaps out an application with one that relays an error message to the + # developer. It will take the application that currently exists in the + # application folder, deletes it, and places a templated app that prints out the + # given error message. + # + # Args: + # app_name: Name of application to construct an error application for + # err_msg: A String message that will be displayed as + # the reason why we couldn't start their application. + # + # Returns: + # Returns: Nothing + # + def place_error_app(app_name, err_msg) + Djinn.log_debug("Placing error application for #{app_name} because of: #{err_msg}") + ea = ErrorApp.new(app_name, err_msg) + ea.generate() + end + def start_appengine() @state = "Preparing to run AppEngine apps if needed" Djinn.log_debug("Starting appengine - pbserver is at [#{@userappserver_private_ip}]") @@ -2803,7 +2821,9 @@ def start_appengine() app_path = "#{app_dir}/#{app}.tar.gz" FileUtils.mkdir_p(app_dir) - copy_app_to_local(app) + if !copy_app_to_local(app) + place_error_app(app, "ERROR: Failed to copy app: #{app}") + end HelperFunctions.setup_app(app) @@ -2813,15 +2833,16 @@ def start_appengine() end app_number = @nginx_port - Nginx::START_PORT proxy_port = HAProxy.app_listen_port(app_number) - login_ip = get_login.public_ip + login_ip = get_login.private_ip if my_node.is_login? and !my_node.is_appengine? success = Nginx.write_fullproxy_app_config(app, app_number, my_public, my_private, proxy_port, login_ip, get_all_appengine_nodes()) if success Nginx.reload else - Djinn.log_debug("ERROR: Failure to create valid nginx config file for application #{app} full proxy.") - next + err_msg = "ERROR: Failure to create valid nginx config file" + \ + " for application #{app} full proxy." + place_error_app(app, err_msg) end @nginx_port += 1 @haproxy_port += 1 @@ -2831,14 +2852,22 @@ def start_appengine() if my_node.is_appengine? app_number = @nginx_port - Nginx::START_PORT start_port = HelperFunctions::APP_START_PORT - static_handlers = HelperFunctions.parse_static_data(app) + begin + static_handlers = HelperFunctions.parse_static_data(app) + rescue Exception => e + # This specific exception may be a json parse error + error_msg = "ERROR: Unable to parse app.yaml file for #{app}." + \ + " Exception of #{e.class} with message #{e.message}" + place_error_app(app, error_msg) + end proxy_port = HAProxy.app_listen_port(app_number) - login_ip = get_login.public_ip - success = Nginx.write_app_config(app, app_number, my_public, + login_ip = get_login.private_ip + success = Nginx.write_app_config(app, app_number, my_public, my_private, proxy_port, static_handlers, login_ip) - if not success - Djinn.log_debug("ERROR: Failure to create valid nginx config file for application #{app}.") - next + if !success + error_msg = "ERROR: Failure to create valid nginx config file " + \ + "for application #{app}." + place_error_app(app, error_msg) end Collectd.write_app_config(app) @@ -2859,14 +2888,14 @@ def start_appengine() @userappserver_private_ip, get_load_balancer_ip(), my_private, app_version, app_language, @nginx_port, xmpp_ip) if pid == -1 - Djinn.log_debug("ERROR: Unable to start application #{app}.") - next + place_error_app(app, "ERROR: Unable to start application " + \ + "#{app}. Please check the application logs.") end pid_file_name = "/etc/appscale/#{app}-#{@appengine_port}.pid" HelperFunctions.write_file(pid_file_name, pid) - location = "http://#{my_public}:#{@appengine_port}#{warmup_url}" + location = "http://#{my_private}:#{@appengine_port}#{warmup_url}" wget_cmd = "wget #{WGET_OPTIONS} #{location}" Djinn.log_run(wget_cmd) @@ -2875,7 +2904,7 @@ def start_appengine() } HAProxy.update_app_config(app, app_number, - @app_info_map[app][:appengine], my_public) + @app_info_map[app][:appengine], my_private) Nginx.reload HAProxy.reload Collectd.restart @@ -2903,7 +2932,7 @@ def start_appengine() login_ip = get_login.public_ip Thread.new { - haproxy_location = "http://#{my_public}:#{haproxy}#{warmup_url}" + haproxy_location = "http://#{my_private}:#{haproxy}#{warmup_url}" nginx_location = "http://#{my_public}:#{nginx}#{warmup_url}" wget_haproxy = "wget #{WGET_OPTIONS} #{haproxy_location}" @@ -2944,7 +2973,7 @@ def scale_appservers return end - if @creds["autoscale"] + if @creds["autoscale"] == "true" Djinn.log_debug("Examining AppServers to autoscale them") perform_scaling_for_appservers() else @@ -3209,7 +3238,7 @@ def add_appserver_process(app) my_private = my_node.private_ip Djinn.log_debug("port apps error contains - #{@app_info_map[app][:appengine]}") HAProxy.update_app_config(app, app_number, @app_info_map[app][:appengine], - my_public) + my_private) Djinn.log_debug("Adding #{app_language} app #{app} on #{HelperFunctions.local_ip}:#{@appengine_port} ") xmpp_ip = get_login.public_ip @@ -3221,7 +3250,7 @@ def add_appserver_process(app) pid_file_name = "#{APPSCALE_HOME}/.appscale/#{app}-#{@appengine_port}.pid" HelperFunctions.write_file(pid_file_name, pid) - location = "http://#{my_public}:#{@appengine_port}#{warmup_url}" + location = "http://#{my_private}:#{@appengine_port}#{warmup_url}" wget_cmd = "wget #{WGET_OPTIONS} #{location}" Djinn.log_run(wget_cmd) @@ -3234,10 +3263,8 @@ def add_appserver_process(app) # add_instance_info = uac.add_instance(app, my_public, @nginx_port) - login_ip = get_login.public_ip - Thread.new { - haproxy_location = "http://#{my_public}:#{haproxy_port}#{warmup_url}" + haproxy_location = "http://#{my_private}:#{haproxy_port}#{warmup_url}" nginx_location = "http://#{my_public}:#{nginx_port}#{warmup_url}" wget_haproxy = "wget #{WGET_OPTIONS} #{haproxy_location}" @@ -3294,7 +3321,7 @@ def remove_appserver_process(app) @app_info_map[app][:appengine].delete(port) HAProxy.update_app_config(app, app_number, @app_info_map[app][:appengine], - my_public) + my_private) HAProxy.reload end @@ -3441,11 +3468,18 @@ def start_sisyphus my_public = my_node.public_ip my_private = my_node.private_ip - login_ip = get_login.public_ip - - static_handlers = HelperFunctions.parse_static_data(app) + public_login_ip = get_login.public_ip + private_login_ip = get_login.private_ip + + begin + static_handlers = HelperFunctions.parse_static_data(app) + rescue Exception => e + error_msg = "ERROR: Unable to parse app.yaml file for #{app}." + \ + " Exception of type #{e.class}. Exception message #{e.message}" + place_error_app(app, error_msg) + end proxy_port = HAProxy.app_listen_port(app_number) - Nginx.write_app_config(app, app_number, my_public, proxy_port, static_handlers, login_ip) + Nginx.write_app_config(app, app_number, my_public, my_private, proxy_port, static_handlers, private_login_ip) HAProxy.write_app_config(app, app_number, num_servers, my_private) Collectd.write_app_config(app) @@ -3453,7 +3487,7 @@ def start_sisyphus Djinn.log_debug("Starting #{app_language} app #{app} on " + "#{HelperFunctions.local_ip}:#{port}") pid = HelperFunctions.run_app(app, port, @userappserver_private_ip, - my_public, my_private, app_version, app_language, nginx_port, login_ip) + my_public, my_private, app_version, app_language, nginx_port, public_login_ip) pid_file_name = "#{APPSCALE_HOME}/.appscale/#{app}-#{port}.pid" HelperFunctions.write_file(pid_file_name, pid) } diff --git a/AppController/djinnServer.rb b/AppController/djinnServer.rb index 9ec4daa8b4..228751bc58 100644 --- a/AppController/djinnServer.rb +++ b/AppController/djinnServer.rb @@ -70,6 +70,8 @@ def on_init `rm -f #{APPSCALE_HOME}/.appscale/status-*` `rm -f #{APPSCALE_HOME}/.appscale/database_info` `rm -f /tmp/mysql.sock` + + Nginx.clear_sites_enabled Collectd.clear_sites_enabled HAProxy.clear_sites_enabled diff --git a/AppController/lib/custom_exceptions.rb b/AppController/lib/custom_exceptions.rb new file mode 100644 index 0000000000..bf3c23b90d --- /dev/null +++ b/AppController/lib/custom_exceptions.rb @@ -0,0 +1,14 @@ +# Programmer: Navraj Chohan + +# A class of exceptions that can be thrown if the AppController is put into an +# unrecoverable state, or a state that we would not normally expect a perfectly +# working AppScale system to get into. +class AppScaleException < Exception +end + +# A class of exceptions that can be thrown if the AppController +# (or its associated libraries) attempts to execute shell commands which +# do not return properly (specifically, not having a return value of zero). +class FailedShellExec < Exception +end + diff --git a/AppController/lib/error_app.rb b/AppController/lib/error_app.rb new file mode 100644 index 0000000000..7c6dd81092 --- /dev/null +++ b/AppController/lib/error_app.rb @@ -0,0 +1,87 @@ +#!/usr/bin/ruby -w + +require 'fileutils' + +$:.unshift File.join(File.dirname(__FILE__)) +require 'custom_exceptions' +require 'helperfunctions' + +$:.unshift File.join(File.dirname(__FILE__), "..") +require 'djinn' + + +# This class generates a Python Google App Engine application that +# relays an error message to the user as to why their app failed to come up. +class ErrorApp + + # + # Constructor + # + # Args: + # app_name: Name of the application to construct an error application for. + # error_msg: A String message that will be displayed as the reason + # why we couldn't start their application. + def initialize(app_name, error_msg) + @app_name = app_name + @error_msg = error_msg + @dir_path = "/var/apps/#{app_name}/app/" + end + + # + # This function places an updated app.yaml and error.py into the application + # and retars the application file. + # + # Args: None + def generate() + app_yaml = <') + self.response.out.write("""

Your application failed to start

""") + self.response.out.write("""

#{@error_msg}

""") + self.response.out.write("""

If this is an AppScale issue please report it on http://github.com/AppScale/appscale/issues

""") + self.response.out.write('') + +application = webapp.WSGIApplication([ + ('/', MainPage), +], debug=True) + + +def main(): + wsgiref.handlers.CGIHandler().run(application) + + +if __name__ == '__main__': + main() + +SCRIPT + + HelperFunctions.write_file(@dir_path + 'app.yaml', app_yaml) + HelperFunctions.write_file(@dir_path + "#{@app_name}.py", script) + + Djinn.log_run("rm #{@dir_path}/#{@app_name}.tar.gz") + Dir.chdir(@dir_path) do + Djinn.log_debug("Running: tar zcvf #{@dir_path}/#{@app_name}.tar.gz #{@dir_path}") + Djinn.log_run("tar zcvf #{@app_name}.tar.gz app.yaml #{@app_name}.py") + end + + return true + end + +end + diff --git a/AppController/lib/helperfunctions.rb b/AppController/lib/helperfunctions.rb index 6b48f8b192..ceacefe89c 100644 --- a/AppController/lib/helperfunctions.rb +++ b/AppController/lib/helperfunctions.rb @@ -29,7 +29,7 @@ class BadConfigurationException < Exception module HelperFunctions - VER_NUM = "1.5" + VER_NUM = "1.6.3" APPSCALE_HOME = ENV['APPSCALE_HOME'] diff --git a/AppController/lib/nginx.rb b/AppController/lib/nginx.rb index f15beb9a22..13ca394455 100644 --- a/AppController/lib/nginx.rb +++ b/AppController/lib/nginx.rb @@ -80,13 +80,13 @@ def self.check_config end # Creates a Nginx config file for the provided app name - def self.write_app_config(app_name, app_number, my_public_ip, proxy_port, static_handlers, login_ip) + def self.write_app_config(app_name, app_number, my_public_ip, my_private_ip, proxy_port, static_handlers, login_ip) static_locations = static_handlers.map { |handler| HelperFunctions.generate_location_config(handler) }.join listen_port = Nginx.app_listen_port(app_number) config = < key, :data => val) - } - else - Djinn.log_debug("[ZK] Key #{key} does not exist, so creating it") - info = self.run_zookeeper_operation { - @@zk.create(:path => key, :ephemeral => ephemeral, :data => val) - } - end + retries_left = 5 + begin + Djinn.log_debug("[ZK] trying to set #{key} to #{val} with ephemeral = #{ephemeral}") + info = {} + if self.exists?(key) + Djinn.log_debug("[ZK] Key #{key} exists, so setting it") + info = self.run_zookeeper_operation { + @@zk.set(:path => key, :data => val) + } + else + Djinn.log_debug("[ZK] Key #{key} does not exist, so creating it") + info = self.run_zookeeper_operation { + @@zk.create(:path => key, :ephemeral => ephemeral, :data => val) + } + end - if !info[:rc].zero? - raise FailedZooKeeperOperationException.new("Failed to set path " + - "#{key} with data #{val}, ephemeral = #{ephemeral}, saw " + - "info #{info.inspect}") + if !info[:rc].zero? + raise FailedZooKeeperOperationException.new("Failed to set path " + + "#{key} with data #{val}, ephemeral = #{ephemeral}, saw " + + "info #{info.inspect}") + end + rescue FailedZooKeeperOperationException => e + retries_left -= 1 + Djinn.log_debug("Saw a failure trying to write to ZK, with " + + "info [#{e}]") + if retries_left > 0 + Djinn.log_debug("Retrying write operation, with #{retries_left}" + + " retries left") + Kernel.sleep(5) + retry + else + Djinn.log_debug("[ERROR] Failed to write to ZK and no retries " + + "left. Skipping on this write for now.") + end end end diff --git a/AppController/test/tc_djinn.rb b/AppController/test/tc_djinn.rb index c1991348cb..12975c0ae4 100644 --- a/AppController/test/tc_djinn.rb +++ b/AppController/test/tc_djinn.rb @@ -94,6 +94,9 @@ def test_get_role_info def test_set_params_w_bad_params + flexmock(HelperFunctions).should_receive(:local_ip). + and_return("127.0.0.1") + flexmock(Djinn).new_instances { |instance| instance.should_receive(:valid_secret?).and_return(true) } @@ -316,7 +319,7 @@ def test_write_our_node_info flexmock(HelperFunctions).should_receive(:sleep_until_port_is_open). and_return() - flexmock(Zookeeper).should_receive(:new).with("public_ip:2181"). + flexmock(Zookeeper).should_receive(:new).with("public_ip:2181", ZKInterface::TIMEOUT). and_return(baz) ZKInterface.init_to_ip("public_ip", "public_ip") assert_equal(nil, djinn.write_our_node_info) @@ -385,7 +388,7 @@ def test_update_local_nodes flexmock(HelperFunctions).should_receive(:sleep_until_port_is_open). and_return() - flexmock(Zookeeper).should_receive(:new).with("public_ip:2181"). + flexmock(Zookeeper).should_receive(:new).with("public_ip:2181", ZKInterface::TIMEOUT). and_return(baz) ZKInterface.init_to_ip("public_ip", "public_ip") @@ -405,19 +408,6 @@ def test_update_local_nodes assert_equal(true, djinn.done_loading) end - def test_ensure_all_roles_are_running_w_other_open_nodes - my_role = "public_ip:private_ip:shadow:instance_id:cloud1" - other_role = "public_ip2:private_ip2:open:instance_id:cloud1" - djinn = Djinn.new - djinn.my_index = 0 - djinn.nodes = [DjinnJobData.new(my_role, "appscale"), - DjinnJobData.new(other_role, "appscale")] - - # first, make sure that we (my_role) takes no action since there - # already is an open node that should be watching - assert_equal([], djinn.ensure_all_roles_are_running()) - end - def test_ensure_all_roles_are_running my_role = "public_ip:private_ip:open:instance_id:cloud1" new_role = "public_ip:private_ip:shadow:instance_id:cloud1" @@ -584,7 +574,7 @@ def test_ensure_all_roles_are_running # mocks for zookeeper initialization flexmock(HelperFunctions).should_receive(:sleep_until_port_is_open). and_return() - flexmock(Zookeeper).should_receive(:new).with("public_ip:2181"). + flexmock(Zookeeper).should_receive(:new).with("public_ip:2181", ZKInterface::TIMEOUT). and_return(baz) djinn = Djinn.new @@ -636,7 +626,7 @@ def test_get_lock_when_somebody_else_has_it # mock out ZooKeeper's init stuff flexmock(HelperFunctions).should_receive(:sleep_until_port_is_open). and_return() - flexmock(Zookeeper).should_receive(:new).with("public_ip:2181"). + flexmock(Zookeeper).should_receive(:new).with("public_ip:2181", ZKInterface::TIMEOUT). and_return(mocked_zk) ZKInterface.init_to_ip("public_ip", "public_ip") diff --git a/AppController/test/tc_error_app.rb b/AppController/test/tc_error_app.rb new file mode 100644 index 0000000000..d3486d5607 --- /dev/null +++ b/AppController/test/tc_error_app.rb @@ -0,0 +1,34 @@ +# Programmer: Navraj Chohan + +$:.unshift File.join(File.dirname(__FILE__), "..") +require 'djinn' + +$:.unshift File.join(File.dirname(__FILE__), "../..", "lib") +require 'error_app' +require 'helperfunctions' + +require 'rubygems' +require 'flexmock/test_unit' + + +class TestErrorApp < Test::Unit::TestCase + def setup + djinn = flexmock(Djinn) + djinn.should_receive(:log_run).and_return() + djinn.should_receive(:log_debug).and_return() + + dir = flexmock(Dir) + dir.should_receive(:chdir).and_return() + + helper_functions = flexmock(HelperFunctions) + helper_functions.should_receive(:write_file).and_return() + end + + def test_creation + errorapp = flexmock(ErrorApp) + assert_nothing_raised(Exception) { + ea_class = ErrorApp.new("testapp", "ERROR") + ea_class.generate() + } + end +end diff --git a/AppController/test/tc_infrastructure_manager_client.rb b/AppController/test/tc_infrastructure_manager_client.rb index adb88675e0..ce3648d421 100644 --- a/AppController/test/tc_infrastructure_manager_client.rb +++ b/AppController/test/tc_infrastructure_manager_client.rb @@ -69,6 +69,8 @@ def test_spawn_one_vm }).and_return(first_result, second_result) } + flexmock(HelperFunctions).should_receive(:local_ip). + and_return("127.0.0.1") imc = InfrastructureManagerClient.new("secret") creds = { @@ -136,6 +138,8 @@ def test_spawn_three_vms }).and_return(first_result, second_result) } + flexmock(HelperFunctions).should_receive(:local_ip). + and_return("127.0.0.1") imc = InfrastructureManagerClient.new("secret") creds = { diff --git a/AppController/test/tc_zkinterface.rb b/AppController/test/tc_zkinterface.rb index ff87019105..55d8c7e477 100644 --- a/AppController/test/tc_zkinterface.rb +++ b/AppController/test/tc_zkinterface.rb @@ -4,6 +4,9 @@ $:.unshift File.join(File.dirname(__FILE__), "..", "lib") require 'zkinterface' +$:.unshift File.join(File.dirname(__FILE__), "..") +require 'djinn' + require 'rubygems' require 'flexmock/test_unit' @@ -56,7 +59,7 @@ def test_add_and_get_app_instance # mocks for zookeeper initialization flexmock(HelperFunctions).should_receive(:sleep_until_port_is_open). and_return() - flexmock(Zookeeper).should_receive(:new).with("public_ip:2181"). + flexmock(Zookeeper).should_receive(:new).with("public_ip:2181", ZKInterface::TIMEOUT). and_return(zk) ZKInterface.init_to_ip("public_ip", "public_ip") diff --git a/AppController/test/ts_all.rb b/AppController/test/ts_all.rb index 532c03d5c8..58d7dcc345 100644 --- a/AppController/test/ts_all.rb +++ b/AppController/test/ts_all.rb @@ -3,6 +3,7 @@ $:.unshift File.join(File.dirname(__FILE__)) # AppController library tests +require 'tc_error_app' require 'tc_infrastructure_manager_client' require 'tc_repo' require 'tc_zkinterface' diff --git a/AppServer/google/appengine/api/taskqueue/taskqueue_rabbitmq.py b/AppServer/google/appengine/api/taskqueue/taskqueue_rabbitmq.py index c00ab84c39..6c59216632 100644 --- a/AppServer/google/appengine/api/taskqueue/taskqueue_rabbitmq.py +++ b/AppServer/google/appengine/api/taskqueue/taskqueue_rabbitmq.py @@ -52,21 +52,37 @@ from google.appengine.api import datastore_errors import pika +#TODO document these globals DEFAULT_RATE = '5.00/s' + DEFAULT_RATE_FLOAT = 5.0 + DEFAULT_BUCKET_SIZE = 5 + MAX_ETA = datetime.timedelta(days=30) + MAX_PULL_TASK_SIZE_BYTES = 2 ** 20 + MAX_PUSH_TASK_SIZE_BYTES = 100 * (2 ** 10) + MAX_TASK_SIZE = MAX_PUSH_TASK_SIZE_BYTES + MAX_REQUEST_SIZE = 32 << 20 + MAX_RETRIES = 10 -MAX_WAIT = 60 # max wait in seconds + +# Max wait in seconds +MAX_WAIT = 60 + +# Max for time for exponential backoff for RabbitMQ reconnect +MAX_RECONNECT_TIME = 1024 + BUILT_IN_HEADERS = set(['x-appengine-queuename', 'x-appengine-taskname', 'x-appengine-taskretrycount', 'x-appengine-development-payload', 'content-length']) + DEFAULT_QUEUE_NAME = 'default' QUEUE_MODE = taskqueue_service_pb.TaskQueueMode @@ -74,6 +90,7 @@ AUTOMATIC_QUEUES = { DEFAULT_QUEUE_NAME: (0.2, DEFAULT_BUCKET_SIZE, DEFAULT_RATE), '__cron': (1, 1, '1/s')} + _TASKQUEUE_KIND = "___TaskQueue___" def _GetAppId(request): @@ -360,9 +377,14 @@ def __init__(self, task_executor, app_name, retry_seconds, **kwargs): self._should_exit = False self.task_executor = task_executor self.default_retry_seconds = retry_seconds - self.connection = pika.BlockingConnection(pika.ConnectionParameters( + try: + self.connection = pika.BlockingConnection(pika.ConnectionParameters( host='localhost')) - self.channel = self.connection.channel() + self.channel = self.connection.channel() + except pika.exceptions.AMQPConnectionError, e: + logging.error("Unable to connect to RabbitMQ: " + str(e)) + except Exception, e: + logging.error("Unknown Exception--unable to connect to RabbitMQ: " + str(e)) self._queue_name = "app_%s"%app_name if kwargs: raise TypeError('Unknown parameters: %s' % ', '.join(kwargs)) @@ -444,8 +466,13 @@ def _TaskCallback(self, ch, method, properties, body): self.connection = pika.BlockingConnection(pika.ConnectionParameters( host='localhost')) self.channel = self.connection.channel() - # TODO should be done transactionally with the publish and reject - # The API does support transactions see + except pika.exceptions.AMQPConnectionError, e: + logging.error("Unable to connect to RabbitMQ: " + str(e)) + except Exception, e: + logging.error("Unknown exception--unable to connect to RabbitMQ: " + str(e)) + # TODO RabbitMQ's basic_publish and reject should be + # done transactionally to prevent race conditions and duplicate + # tasks being enqueued. The API does support transactions see: # http://www.rabbitmq.com/amqp-0-9-1-reference.html else: ch.basic_reject(delivery_tag = method.delivery_tag, requeue = False) @@ -453,6 +480,7 @@ def _TaskCallback(self, ch, method, properties, body): def MainLoop(self): """The main loop of the scheduler.""" + reconnect_time = 1 while 1: try: logging.info("Connecting to RabbitMQ") @@ -468,8 +496,13 @@ def MainLoop(self): logging.error("RabbitMQ Connection error %s"%str(e)) except Exception, e: logging.error("RabbitMQ Unknown exception %s"%str(e)) - logging.info("Reconnecting in 5 seconds") - time.sleep(5) + logging.info("Reconnecting in " + str(reconnect_time) + " seconds") + time.sleep(reconnect_time) + + if reconnect_time <= MAX_RECONNECT_TIME: + reconnect_time *= 2 + else: + reconnect_time = MAX_RECONNECT_TIME class TaskQueueServiceStub(apiproxy_stub.APIProxyStub): """Python only task queue service stub. @@ -519,10 +552,15 @@ def __init__(self, self._task_scheduler = _BackgroundTaskScheduler( _TaskExecutor(default_http_server, self._secret_hash), app_id, retry_seconds=task_retry_seconds) - self.connection = pika.BlockingConnection(pika.ConnectionParameters( + try: + self.connection = pika.BlockingConnection(pika.ConnectionParameters( host='localhost')) - self.channel = self.connection.channel() - self.channel.queue_declare(queue='app_%s'%app_id, durable=False) + self.channel = self.connection.channel() + self.channel.queue_declare(queue='app_%s'%app_id, durable=False) + except pika.exceptions.AMQPConnectionError, e: + logging.error("RabbitMQ Connection error %s"%str(e)) + except Exception, e: + logging.error("Unknown exception--Unable to connect to to RabbitMQ") def StartBackgroundExecution(self): """Start automatic task execution.""" @@ -745,6 +783,8 @@ def _enqueue_task(self, request, now): host='localhost')) raise apiproxy_errors.ApplicationError( taskqueue_service_pb.TaskQueueServiceError.TRANSIENT_ERROR) + except Exception, e: + logging.error("Unknown exception--Unable to connect to RabbitMQ") def _LocateTaskByName(self, task_name): """ Makes sure the task does not exist or tombstoned diff --git a/README b/README index 0ec46ebdad..a1ac453936 100644 --- a/README +++ b/README @@ -4,8 +4,8 @@ Eucalyptus as well as Xen and KVM. It has been developed and is maintained by the RACELab at UC Santa Barbara. It supports the Python, Java, and Go Google App Engine platforms. -The latest stable release is AppScale 1.6-rc1, released May 17, 2012. Feature -requests and pull requests gladly considered. +The latest stable release is AppScale 1.6.3, released October 30, 2012. +Feature requests and pull requests gladly considered. Join the mailing list for announcements, help, and to discuss cloud research: http://groups.google.com/group/appscale_community diff --git a/RELEASE b/RELEASE index b9c34a1b18..82b25a070c 100644 --- a/RELEASE +++ b/RELEASE @@ -4,15 +4,46 @@ / ___ |/ /_/ / /_/ /___/ / /__/ /_/ / // __/ /_/ |_/ .___/ .___//____/\___/\__,_/_/ \___/ /_/ /_/ -The current release is 1.6, relased XXX, 2012. -Included in this release (over 1.5) is: -.... -XXXXXXXXXXXXXXXXXXXXXXXXX +The current release is 1.6.3, relased October 30, 2012. + +Bugs fixed in this release: +- Autoscaler scales down when --appengine is used (https://github.com/AppScale/appscale/issues/43) +- AppController crashes in response to failed ZooKeeper operations (https://github.com/AppScale/appscale/issues/47) +- Python applications should start even if rabbitmq fails to start (https://github.com/AppScale/appscale/issues/62) +- Bad applications crash appscale (https://github.com/AppScale/appscale/issues/57) + +and updated versions of Cassandra and HBase used - more info to come soon! -Included in this release (over 1.4) is: +Information about previous releases: .... -The AppScale version 1.4, released September 7, 2010. +AppScale version 1.5 was released July 28, 2011. Features include: + + Support for the bulkloader, enabling persistence for your data + Upgraded Java and Python AppServers to GAE 1.4.3 + Support for Go App Engine apps (SDK version 1.5.0), including support for apps that use multiple processes + Fault tolerance for almost all services (processes monitored and revived by god) + Faster startup and termination of AppScale, especially over larger numbers of nodes + Tools and image now verify that all instances used have AppScale installed + EC2 and Eucalyptus credentials are now obscured when they are printed to logs + Channel API for Python (multiple receivers can also be used) - implemented via Strophe.js + Blobstore and Files API for Python + XMPP API for Python - implemented via ejabberd + Hybrid cloud support - run AppScale over multiple clouds in a single deployment (e.g., Eucalyptus and EC2, EC2 East Coast and EC2 West Coast) + Neptune language support + Table caching for MySQL, HBase, Hypertable to improve performance + Updated interface for Amazon SimpleDB + Upgraded Cassandra version used to 0.7.6-2 + Upgraded HBase version used to 0.89 + Upgraded Hadoop version used to 0.20.2 + Upgraded Hypertable version used to 0.9.43 + Namespacing support + Added Loki, a fault tolerance tester along the lines of Netflix's Chaos Monkey + User authorization system for MapReduce, EC2, and Neptune APIs + Ability to remove transaction overhead via namespaces + Various other bug fixes + +AppScale version 1.4 was released September 7, 2010. Included in this release (over version 1.3) is: diff --git a/VERSION b/VERSION index b974235bc6..bb654bd917 100644 --- a/VERSION +++ b/VERSION @@ -4,5 +4,5 @@ / ___ |/ /_/ / /_/ /___/ / /__/ /_/ / // __/ /_/ |_/ .___/ .___//____/\___/\__,_/_/ \___/ /_/ /_/ -AppScale version 1.6 +AppScale version 1.6.3 diff --git a/debian/appscale_build.sh b/debian/appscale_build.sh index dc09139b9c..42ddfcee9c 100755 --- a/debian/appscale_build.sh +++ b/debian/appscale_build.sh @@ -130,7 +130,7 @@ fi # remove conflict package apt-get -y purge haproxy -#apt-get -y remove consolekit +apt-get -y remove consolekit bash debian/appscale_install.sh all mkdir -p $APPSCALE_HOME_RUNTIME/.appscale/certs diff --git a/debian/appscale_install_functions.sh b/debian/appscale_install_functions.sh index 4fa6903d57..b87aae5731 100644 --- a/debian/appscale_install_functions.sh +++ b/debian/appscale_install_functions.sh @@ -12,7 +12,7 @@ fi #if [ -z "$APPSCALE_HOME" ]; then # export APPSCALE_HOME= /root/appscale/ #fi -export APPSCALE_VERSION=1.5 +export APPSCALE_VERSION=1.6.3 increaseconnections() { @@ -258,6 +258,11 @@ installtornado() fi } +installflexmock() +{ + easy_install flexmock || exit 1 +} + postinstalltornado() { # just enable tornado @@ -471,6 +476,11 @@ installgems() # This is for the LogManager, which will rotate logs on a daily basis. gem install -v=1.2.1 logrotate ${GEMOPT} || exit 1 + + # This is for the unit testing framework + gem install -v=1.0.4 flexmock ${GEMOPT} || exit 1 + gem install -v=1.0.0 rcov ${GEMOPT} || exit 1 + } postinstallgems()