Skip to content

Commit

Permalink
Merge pull request #1922 from obino/simplify-restore
Browse files Browse the repository at this point in the history
Simplify restore logic.
  • Loading branch information
menivaitsi committed Feb 5, 2016
2 parents 484c784 + cbcf71e commit 165e710
Show file tree
Hide file tree
Showing 2 changed files with 68 additions and 68 deletions.
132 changes: 66 additions & 66 deletions AppController/djinn.rb
Original file line number Diff line number Diff line change
Expand Up @@ -1698,17 +1698,28 @@ def job_start(secret)
Djinn.log_info("==== Starting AppController ====")

start_infrastructure_manager()
data_restored, need_to_start_jobs = restore_appcontroller_state()

if data_restored
# We need to wait for the 'state', that is the deployment layouts and
# the options for this deployment. It's either a save state from a
# previous start, or it comes from the tools. If the tools communicate
# the deployment's data, then we are the headnode.
if restore_appcontroller_state()
parse_options()
else
erase_old_data()
wait_for_data()
parse_options()
end

if need_to_start_jobs and my_node.is_shadow?
# From here on we have the basic local state that allows to operate.
# In particular we know our roles, and the deployment layout. Let's
# start attaching any permanent disk we may have associated with us.
mount_persistent_storage

# If we are the headnode, we may need to start/setup all other nodes.
# Better do it early on, since it may take some time for the other
# nodes to start up.
if my_node.is_shadow?
Djinn.log_info("Spawning/setting up other nodes.")
spawn_and_setup_appengine
end
Expand Down Expand Up @@ -2723,56 +2734,38 @@ def backup_appcontroller_state()
end
end

# In multinode deployments, it could be the case that we restored data
# from ZooKeeper on a different machine. In that case, we still need to
# start all the services on this machine.
#
# Returns:
# true, if we do, false otherwise.
def are_we_restoring_from_local()
if HelperFunctions.is_process_running?("zookeeper")
return false
else
return true
end
end

# Restores the state of each of the instance variables that the AppController
# holds by pulling it from ZooKeeper (previously populated by the Shadow
# node, who always has the most up-to-date version of this data).
#
# Returns:
# Two booleans, that indicate if (1) data was restored to this AppController
# from either ZooKeeper or locally, and (2) if we need to start the roles
# on this machine or not.
# A boolean to indicate if we were able to restore the state from
# either zookeeper of the local disk.
def restore_appcontroller_state()
Djinn.log_info("Restoring AppController state")
restoring_from_local = true
json_state=""

if File.exists?(ZK_LOCATIONS_FILE)
Djinn.log_info("Trying to restore data from ZooKeeper.")
json_state = restore_from_zookeeper()
if json_state.empty?
Djinn.log_info("Failed to restore data from ZooKeeper, trying locally.")
json_state = restore_from_local_data()
if json_state == nil
Djinn.log_warn("Unable to restore from ZK or local state, not restoring!")
restoring_from_local = are_we_restoring_from_local()
return false, restoring_from_local
end
else
if not json_state.empty?
Djinn.log_info("Restored data from ZooKeeper.")
restoring_from_local = are_we_restoring_from_local()
end
else
if File.exists?(HelperFunctions::APPCONTROLLER_STATE_LOCATION)
Djinn.log_info("Restoring from local data")
json_state = restore_from_local_data()
else
Djinn.log_info("No recovery data found - skipping recovery process")
return false, restoring_from_local
end

if json_state.empty? and File.exists?(HelperFunctions::APPCONTROLLER_STATE_LOCATION)
Djinn.log_info("Trying to restore data from local data.")
json_state = restore_from_local_data()
if not json_state.empty?
Djinn.log_info("Restored data from local data.")
end
end

if json_state.empty?
Djinn.log_warn("Unable to restore from ZK or local state, not restoring!")
return false
end

Djinn.log_info("Reload State : #{json_state}")

@@secret = json_state['@@secret']
Expand Down Expand Up @@ -2801,7 +2794,9 @@ def restore_appcontroller_state()
# of our internal state to use the new public and private IP anywhere the
# old ones were present.
if !HelperFunctions.get_all_local_ips().include?(@my_private_ip)
Djinn.log_info("IP changed old private:#{@my_private_ip} public:#{@my_public_ip}.")
update_state_with_new_local_ip()
Djinn.log_info("IP changed new private:#{@my_private_ip} public:#{@my_public_ip}.")
end

# Now that we've restored our state, update the pointer that indicates
Expand All @@ -2815,7 +2810,7 @@ def restore_appcontroller_state()
restore_appserver_state()
end

return true, restoring_from_local
return true
end


Expand Down Expand Up @@ -4251,31 +4246,9 @@ def my_node()
return @nodes[@my_index]
end

# Perform any necessary initialization steps before we begin starting up
# services.
def initialize_server()
head_node_ip = get_public_ip(@options['hostname'])

if not HAProxy.is_running?
HAProxy.initialize_config()
HAProxy.create_app_load_balancer_config(my_node.public_ip,
my_node.private_ip, AppDashboard::PROXY_PORT)
HAProxy.start()
Djinn.log_info("HAProxy configured and started.")
else
Djinn.log_info("HAProxy already configured.")
end

if not Nginx.is_running?
Nginx.initialize_config()
Nginx.create_app_load_balancer_config(my_node.public_ip,
my_node.private_ip, AppDashboard::PROXY_PORT)
Nginx.start()
Djinn.log_info("Nginx configured and started.")
else
Djinn.log_info("Nginx already configured and running.")
end

# If we are in cloud mode, we should mount any volume containing our
# local state.
def mount_persistent_storage()
if my_node.disk
imc = InfrastructureManagerClient.new(@@secret)
begin
Expand Down Expand Up @@ -4338,6 +4311,31 @@ def initialize_server()
Djinn.log_run("mv /var/lib/rabbitmq #{PERSISTENT_MOUNT_POINT}")
Djinn.log_run("ln -s #{PERSISTENT_MOUNT_POINT}/rabbitmq /var/lib/rabbitmq")
end
end

# This function performs basic setup ahead of starting the API services.
def initialize_server()
head_node_ip = get_public_ip(@options['hostname'])

if not HAProxy.is_running?
HAProxy.initialize_config()
HAProxy.create_app_load_balancer_config(my_node.public_ip,
my_node.private_ip, AppDashboard::PROXY_PORT)
HAProxy.start()
Djinn.log_info("HAProxy configured and started.")
else
Djinn.log_info("HAProxy already configured.")
end

if not Nginx.is_running?
Nginx.initialize_config()
Nginx.create_app_load_balancer_config(my_node.public_ip,
my_node.private_ip, AppDashboard::PROXY_PORT)
Nginx.start()
Djinn.log_info("Nginx configured and started.")
else
Djinn.log_info("Nginx already configured and running.")
end

# Volume is mounted, let's finish the configuration of static files.
configure_db_nginx()
Expand Down Expand Up @@ -4391,14 +4389,16 @@ def set_appcontroller_monit()
'EC2_HOME' => ENV['EC2_HOME'],
'JAVA_HOME' => ENV['JAVA_HOME']
}
start = "/usr/bin/ruby -w #{APPSCALE_HOME}/AppController/djinnServer.rb"
start = "/usr/sbin/service appscale-controller start"
stop = "/usr/sbin/service appscale-controller stop"
match_cmd = "/usr/bin/ruby -w /root/appscale/AppController/djinnServer.rb"

# Let's make sure we don't have 2 jobs monitoring the controller.
FileUtils.rm_rf("/etc/monit/conf.d/controller-17443.cfg")

begin
MonitInterface.start(:controller, start, stop, SERVER_PORT, env)
MonitInterface.start(:controller, start, stop, SERVER_PORT, env,
nil, nil, match_cmd)
rescue Exception => e
Djinn.log_warn("Failed to set local AppController monit: retrying.")
retry
Expand Down
4 changes: 2 additions & 2 deletions AppController/scripts/appcontroller
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ do_start()
exit 0
fi

# If we start from boot, we need to clear the state.
rm -rf /etc/appscale/zookeeper_locations.json
# If we start from boot, we need to clear the monit state. The
# AppController will rebuild it.
rm -rf /etc/monit/conf.d/appscale*cfg

log_daemon_msg "Starting system $DAEMON_NAME daemon"
Expand Down

0 comments on commit 165e710

Please sign in to comment.