Skip to content

Commit

Permalink
Improve collectInfo command
Browse files Browse the repository at this point in the history
Various improvements to `collectInfo` command:
1. scp now doesn't hardcode "localhost". Specifying `localhost` works on
Mac but not on a CentOS machine.
2. If the command is invalid like `alluxio collectInfo bar /dir`, early
reject before distributing the command and find out it's invalid
locally.
3. Catch metrics collection errors when metrics are unavailable, instead
of failing the command and subsequently skips collecting tarballs.
4. Misc bash command fixes.

pr-link: #10918
change-id: cid-3e4f7ed784d28bd0dafd52a92bb4ac7a417a7275
  • Loading branch information
jiacheliu3 committed Feb 21, 2020
1 parent 2fa8a6e commit 23ca2e4
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 12 deletions.
2 changes: 1 addition & 1 deletion core/common/src/main/java/alluxio/shell/ScpCommand.java
Expand Up @@ -44,7 +44,7 @@ public ScpCommand(String remoteHost, String fromFile, String toFile) {
*/
public ScpCommand(String remoteHost, String fromFile, String toFile, boolean isDir) {
super(new String[]{"bash", "-c",
String.format(isDir ? "scp -r %s %s:%s localhost:%s" : "scp %s %s:%s localhost:%s",
String.format(isDir ? "scp -r %s %s:%s %s" : "scp %s %s:%s %s",
ShellUtils.COMMON_SSH_OPTS, remoteHost, fromFile, toFile)
});
mHostName = remoteHost;
Expand Down
4 changes: 4 additions & 0 deletions shell/src/main/java/alluxio/cli/bundler/CollectInfo.java
Expand Up @@ -146,6 +146,10 @@ public static void main(String[] argv) throws IOException {
2, argv.length);
shell.printUsage();
System.exit(-1);
} else if (shell.findCommand(args[0]) == null) {
System.out.format("Command %s is not recognized.%n", args[0]);
shell.printUsage();
System.exit(-2);
}

// Choose mode based on option
Expand Down
Expand Up @@ -38,33 +38,33 @@ public CollectEnvCommand(FileSystemContext fsContext) {
@Override
protected void registerCommands() {
registerCommand("Alluxio ps",
new ShellCommand(new String[]{"bash", "-c", "'ps -ef | grep alluxio'"}), null);
new ShellCommand(new String[]{"bash", "-c", "ps", "-ef", "| grep alluxio"}), null);
registerCommand("Spark ps",
new ShellCommand(new String[]{"bash", "-c", "'ps -ef | grep spark'"}), null);
new ShellCommand(new String[]{"bash", "-c", "ps", "-ef", "| grep spark"}), null);
registerCommand("Yarn ps",
new ShellCommand(new String[]{"bash", "-c", "'ps -ef | grep yarn'"}), null);
new ShellCommand(new String[]{"bash", "-c", "ps", "-ef", "| grep yarn"}), null);
registerCommand("Hdfs ps",
new ShellCommand(new String[]{"bash", "-c", "'ps -ef | grep hdfs'"}), null);
new ShellCommand(new String[]{"bash", "-c", "ps", "-ef", "| grep hdfs"}), null);
registerCommand("Presto ps",
new ShellCommand(new String[]{"bash", "-c", "'ps -ef | grep presto'"}), null);
new ShellCommand(new String[]{"bash", "-c", "ps", "-ef", "| grep presto"}), null);
registerCommand("env",
new ShellCommand(new String[]{"env"}), null);
registerCommand("top", new ShellCommand(new String[]{"atop", "-b", "-n", "1"}),
new ShellCommand(new String[]{"top", "-b", "-n", "1"}));
registerCommand("mount", new ShellCommand(new String[]{"mount"}), null);
registerCommand("df", new ShellCommand(new String[]{"df", "-H"}), null);
registerCommand("ulimit", new ShellCommand(new String[]{"ulimit -Ha"}), null);
registerCommand("ulimit", new ShellCommand(new String[]{"ulimit", "-Ha"}), null);
registerCommand("uname", new ShellCommand(new String[]{"uname", "-a"}), null);
registerCommand("hostname", new ShellCommand(new String[]{"hostname"}), null);
registerCommand("host ip", new ShellCommand(new String[]{"hostname", "-i"}), null);
registerCommand("host fqdn", new ShellCommand(new String[]{"hostname", "-f"}), null);
registerCommand("list Alluxio home",
new ShellCommand(new String[]{String.format("ls -al -R %s",
new ShellCommand(new String[]{String.format("ls", "-al -R %s",
mFsContext.getClusterConf().get(PropertyKey.HOME))}), null);
registerCommand("dig", new ShellCommand(new String[]{"dig $(hostname -i)"}), null);
registerCommand("dig", new ShellCommand(new String[]{"dig", "$(hostname -i)"}), null);
registerCommand("nslookup", new ShellCommand(new String[]{"nslookup", "$(hostname -i)"}), null);
// TODO(jiacheng): does this stop?
registerCommand("dstat", new ShellCommand(new String[]{"dstat", "-cdgilmnprsty"}), null);
registerCommand("dstat", new ShellCommand(
new String[]{"dstat", "-cdgilmnprsty", "1", "5"}), null);
}

@Override
Expand Down
Expand Up @@ -88,7 +88,15 @@ public int run(CommandLine cl) throws AlluxioException, IOException {
LOG.info(String.format("Metric address URL: %s", url));

// Get metrics
String metricsResponse = getMetricsJson(url);
String metricsResponse;
try {
metricsResponse = getMetricsJson(url);
} catch (Exception e) {
// Do not break the loop since the HTTP failure can be due to many reasons
// Return the error message instead
LOG.error("Failed to get Alluxio metrics from URL %s. Exception is %s", url, e);
metricsResponse = String.format("Url: %s%nError: %s", url, e.getMessage());
}
outputBuffer.write(metricsResponse);

// Write to file
Expand Down Expand Up @@ -120,6 +128,8 @@ public String getDescription() {

/**
* Probes Alluxio metrics json sink.
* If the HTTP request fails, return the error content
* instead of throwing an exception.
*
* @param url URL that serves Alluxio metrics
* @return HTTP response in JSON string
Expand Down

0 comments on commit 23ca2e4

Please sign in to comment.