Skip to content

Commit

Permalink
Merge pull request #14 from Netflix/warmv2backupv2
Browse files Browse the repository at this point in the history
warmup and backup enhancements, now rest endpoint
  • Loading branch information
ipapapa committed Jul 19, 2016
2 parents 0fd0eb0 + 7265c3c commit 1036d0f
Show file tree
Hide file tree
Showing 17 changed files with 1,102 additions and 662 deletions.
Expand Up @@ -26,14 +26,14 @@
import com.netflix.dynomitemanager.monitoring.ServoMetricsTask;
import com.netflix.dynomitemanager.sidecore.IConfiguration;
import com.netflix.dynomitemanager.sidecore.aws.UpdateSecuritySettings;
import com.netflix.dynomitemanager.sidecore.backup.SnapshotTask;
import com.netflix.dynomitemanager.sidecore.backup.RestoreTask;
import com.netflix.dynomitemanager.sidecore.scheduler.TaskScheduler;
import com.netflix.dynomitemanager.sidecore.utils.ProcessMonitorTask;
import com.netflix.dynomitemanager.sidecore.utils.Sleeper;
import com.netflix.dynomitemanager.sidecore.utils.ProxyAndStorageResetTask;
import com.netflix.dynomitemanager.sidecore.utils.TuneTask;
import com.netflix.dynomitemanager.sidecore.utils.WarmBootstrapTask;
import com.netflix.dynomitemanager.backup.RestoreFromS3Task;
import com.netflix.dynomitemanager.backup.SnapshotBackup;
import com.netflix.servo.DefaultMonitorRegistry;
import com.netflix.servo.monitor.Monitors;

Expand Down Expand Up @@ -106,8 +106,8 @@ else if (UpdateSecuritySettings.firstTimeUpdated) {
// Determine if we need to restore from backup else start Dynomite.
if (config.isRestoreEnabled()) {
logger.info("Restore is enabled.");
scheduler.runTaskNow(RestoreFromS3Task.class); //restore from the AWS
logger.info("Scheduled task " + RestoreFromS3Task.JOBNAME);
scheduler.runTaskNow(RestoreTask.class); //restore from the AWS
logger.info("Scheduled task " + RestoreTask.TaskName);
} else { //no restores needed
logger.info("Restore is disabled.");

Expand All @@ -129,7 +129,7 @@ else if (UpdateSecuritySettings.firstTimeUpdated) {
// Backup
if (config.isBackupEnabled() && config.getBackupHour() >= 0)
{
scheduler.addTask(SnapshotBackup.TaskName, SnapshotBackup.class, SnapshotBackup.getTimer(config));
scheduler.addTask(SnapshotTask.TaskName, SnapshotTask.class, SnapshotTask.getTimer(config));
}

// Metrics
Expand Down
Expand Up @@ -18,20 +18,34 @@
import java.util.concurrent.atomic.AtomicBoolean;

import com.google.inject.Singleton;
import org.joda.time.DateTime;


/**
* Contains the state of the health of processed managed by Florida, and
* maintains the isHealthy flag used for reporting discovery health check.
*
*/
@Singleton
public class InstanceState {
private final AtomicBoolean isSideCarProcessAlive = new AtomicBoolean(false);
private final AtomicBoolean isBootstrapping = new AtomicBoolean(false);
private final AtomicBoolean isBootstrapSuccesful = new AtomicBoolean(false);
private final AtomicBoolean firstBootstrap = new AtomicBoolean(true);
private final AtomicBoolean isBackup = new AtomicBoolean(false);
private final AtomicBoolean isRestore = new AtomicBoolean (false);
private final AtomicBoolean isBackupSuccessful = new AtomicBoolean(false);
private final AtomicBoolean firstBackup = new AtomicBoolean(true);
private final AtomicBoolean isRestore = new AtomicBoolean(false);
private final AtomicBoolean isRestoreSuccessful = new AtomicBoolean(false);
private final AtomicBoolean firstRestore = new AtomicBoolean(true);
private final AtomicBoolean isStorageProxyAlive = new AtomicBoolean(false);
private final AtomicBoolean isStorageProxyProcessAlive = new AtomicBoolean(false);
private final AtomicBoolean isStorageAlive = new AtomicBoolean(false);

private long bootstrapTime;
private long backupTime;
private long restoreTime;

// This is true if storage proxy and storage are alive.
private final AtomicBoolean isHealthy = new AtomicBoolean(false);
// State of whether the rest endpoints /admin/stop or /admin/start are invoked
Expand All @@ -43,6 +57,8 @@ public String toString() {
return "InstanceState{" +
"isSideCarProcessAlive=" + isSideCarProcessAlive +
", isBootstrapping=" + isBootstrapping +
", isBackingup=" + isBackup +
", isRestoring=" + isRestore +
", isStorageProxyAlive=" + isStorageProxyAlive +
", isStorageProxyProcessAlive=" + isStorageProxyProcessAlive +
", isStorageAlive=" + isStorageAlive +
Expand All @@ -63,30 +79,105 @@ public void setSideCarProcessAlive(boolean isSideCarProcessAlive) {
public int metricIsSideCarProcessAlive() {
return isSideCarProcessAlive() ? 1 : 0;
}


/* Boostrap */
public boolean isBootstrapping() {
return isBootstrapping.get();
}

public boolean isBackingup() {
return isBackup.get();
public boolean isBootstrapSuccessful() {
return isBootstrapSuccesful.get();
}

public boolean isRestoring() {
return isRestore.get();
public boolean firstBootstrap() {
return firstBootstrap.get();
}

public long getBootstrapTime() {
return bootstrapTime;
}

public void setBootstrapping(boolean isBootstrapping) {
this.isBootstrapping.set(isBootstrapping);
}

public void setBootstrapStatus(boolean isBootstrapSuccesful) {
this.isBootstrapSuccesful.set(isBootstrapSuccesful);
}

public void setFirstBootstrap(boolean firstBootstrap) {
this.firstBootstrap.set(firstBootstrap);
}

public void setBootstrapTime(DateTime bootstrapTime) {
this.bootstrapTime = bootstrapTime.getMillis();
}

/* Backup */
public boolean isBackingup() {
return isBackup.get();
}

public boolean isBackupSuccessful() {
return isBackupSuccessful.get();
}

public boolean firstBackup() {
return firstBackup.get();
}

public long getBackupTime() {
return backupTime;
}

public void setBackingup(boolean isBackup) {
this.isBackup.set(isBackup);
}

public void setBackUpStatus(boolean isBackupSuccessful) {
this.isBackupSuccessful.set(isBackupSuccessful);
}

public void setFirstBackup(boolean firstBackup) {
this.firstBackup.set(firstBackup);
}

public void setBackupTime(DateTime backupTime) {
this.backupTime = backupTime.getMillis();
}

/* Restore */
public boolean isRestoring() {
return isRestore.get();
}

public boolean isRestoreSuccessful() {
return isRestoreSuccessful.get();
}

public boolean firstRestore() {
return firstRestore.get();
}

public long getRestoreTime() {
return restoreTime;
}

public void setRestoring(boolean isRestoring) {
this.isRestore.set(isRestoring);
}

public void setRestoreStatus(boolean isRestoreSuccessful) {
this.isRestoreSuccessful.set(isRestoreSuccessful);
}

public void setFirstRestore(boolean firstRestore) {
this.firstRestore.set(firstRestore);
}

public void setRestoreTime(DateTime restoreTime) {
this.restoreTime = restoreTime.getMillis();
}

//@Monitor(name="bootstrapping", type=DataSourceType.GAUGE)
public int metricIsBootstrapping() {
Expand Down Expand Up @@ -141,7 +232,7 @@ public boolean isHealthy() {
private void setHealthy() {
this.isHealthy.set(isStorageProxyAlive() && isStorageAlive());
}

//@Monitor(name="healthy", type=DataSourceType.GAUGE)
public int metricIsHealthy() {
return isHealthy() ? 1 : 0;
Expand All @@ -159,4 +250,5 @@ public void setIsProcessMonitoringSuspended(boolean ipms) {
public int metricIsProcessMonitoringSuspended() {
return getIsProcessMonitoringSuspended() ? 1 : 0;
}

}
Expand Up @@ -31,7 +31,7 @@
import com.netflix.dynomitemanager.sidecore.IConfiguration;

/**
* Factory to use cassandra for managing instance data
* Factory to use Cassandra for managing instance data
*/

@Singleton
Expand All @@ -58,6 +58,18 @@ public List<AppsInstance> getAllIds(String appName)
sort(return_);
return return_;
}

public List<AppsInstance> getLocalDCIds(String appName, String region)
{
List<AppsInstance> return_ = new ArrayList<AppsInstance>();
for (AppsInstance instance : dao.getLocalDCInstances(appName, region)) {
return_.add(instance);
}

sort(return_);
return return_;
}


public void sort(List<AppsInstance> return_)
{
Expand Down
Expand Up @@ -32,6 +32,15 @@ public interface IAppsInstanceFactory
*/
public List<AppsInstance> getAllIds(String appName);


/**
* Return a list of Local Dynomite server nodes registered.
* @param appName the cluster name
* @param region the the region of the node
* @return a list of nodes in {@code appName} and same Racks
*/
public List<AppsInstance> getLocalDCIds(String appName, String region);

/**
* Return the Dynomite server node with the given {@code id}.
* @param appName the cluster name
Expand Down Expand Up @@ -80,4 +89,5 @@ public AppsInstance create(String app, int id, String instanceID, String hostnam
* @param device
*/
public void attachVolumes(AppsInstance instance, String mountPath, String device);
}

}
Expand Up @@ -251,6 +251,18 @@ public AppsInstance getInstance(String app, String rack, int id)
}
return null;
}

public Set<AppsInstance> getLocalDCInstances(String app, String region)
{
Set<AppsInstance> set = getAllInstances(app);
Set<AppsInstance> returnSet = new HashSet<AppsInstance>();

for (AppsInstance ins : set) {
if (ins.getDatacenter().equals(region))
returnSet.add(ins);
}
return returnSet;
}

public Set<AppsInstance> getAllInstances(String app)
{
Expand Down

1 comment on commit 1036d0f

@diegopacheco
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well done @ipapapa very nice!

Please sign in to comment.