Skip to content

Commit

Permalink
System info export for debugging and bug reporting (#34)
Browse files Browse the repository at this point in the history
* System info export for debugging and bug reporting

Signed-off-by: Ryan Nett <rnett@skymind.io>

* class name fix

Signed-off-by: Ryan Nett <rnett@skymind.io>

* add version information, pointer memory info

Signed-off-by: Ryan Nett <rnett@skymind.io>

* add nvidia-smi and nvcc info

Signed-off-by: Ryan Nett <rnett@skymind.io>

* line cleanup

Signed-off-by: Ryan Nett <rnett@skymind.io>

* nvidia-smi run works

Signed-off-by: Ryan Nett <rnett@skymind.io>

* add oshi dependency

Signed-off-by: Ryan Nett <rnett@skymind.io>

* use OS info, add workspaces info

Signed-off-by: Ryan Nett <rnett@skymind.io>

* use ServiceLoader to load GPU information

Signed-off-by: Ryan Nett <rnett@skymind.io>

* register service

Signed-off-by: Ryan Nett <rnett@skymind.io>

* moved service out of NativeOpsHolder (private constructor)

Signed-off-by: Ryan Nett <rnett@skymind.io>

* added newline

Signed-off-by: Ryan Nett <rnett@skymind.io>

* added license

Signed-off-by: Ryan Nett <rnett@skymind.io>

* and one more

Signed-off-by: Ryan Nett <rnett@skymind.io>

* copyright update

Signed-off-by: Ryan Nett <rnett@skymind.io>

* fixes

Signed-off-by: Ryan Nett <rnett@skymind.io>

* removed unused imports

Signed-off-by: Ryan Nett <rnett@skymind.io>

* removed more unused imports

Signed-off-by: Ryan Nett <rnett@skymind.io>

* close streams

Signed-off-by: Ryan Nett <rnett@skymind.io>

* and another one

Signed-off-by: Ryan Nett <rnett@skymind.io>

* use method

Signed-off-by: Ryan Nett <rnett@skymind.io>

* one more copyright

Signed-off-by: Ryan Nett <rnett@skymind.io>

* remove double license

Signed-off-by: Ryan Nett <rnett@skymind.io>

* moved test to correct package

Signed-off-by: Ryan Nett <rnett@skymind.io>

* classpath update

Signed-off-by: Ryan Nett <rnett@skymind.io>

* classpath for java >8 fix

Signed-off-by: Ryan Nett <rnett@skymind.io>
  • Loading branch information
Ryan Nett committed Jul 2, 2019
1 parent 93d6d75 commit 3480e6d
Show file tree
Hide file tree
Showing 9 changed files with 628 additions and 38 deletions.
Expand Up @@ -16,7 +16,24 @@

package org.deeplearning4j.util;

import static org.deeplearning4j.nn.conf.inputs.InputType.inferInputType;
import static org.deeplearning4j.nn.conf.inputs.InputType.inferInputTypes;
import static org.nd4j.systeminfo.SystemInfo.inferVersion;

import com.jakewharton.byteunits.BinaryByteUnit;
import java.io.File;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import lombok.Getter;
import lombok.NonNull;
import lombok.extern.slf4j.Slf4j;
Expand Down Expand Up @@ -45,20 +62,9 @@
import org.nd4j.linalg.util.ArrayUtil;
import org.nd4j.nativeblas.NativeOps;
import org.nd4j.nativeblas.NativeOpsHolder;
import org.nd4j.versioncheck.VersionCheck;
import org.nd4j.versioncheck.VersionInfo;
import oshi.SystemInfo;
import oshi.software.os.OperatingSystem;

import java.io.File;
import java.io.IOException;
import java.lang.reflect.Constructor;
import java.text.SimpleDateFormat;
import java.util.*;

import static org.deeplearning4j.nn.conf.inputs.InputType.inferInputType;
import static org.deeplearning4j.nn.conf.inputs.InputType.inferInputTypes;

/**
* A utility for generating crash reports when an out of memory error occurs.
*
Expand Down Expand Up @@ -396,8 +402,6 @@ private static StringBuilder genericMemoryStatus(){
sb.append(String.format(fGpu, "Name", "CC", "Total Memory", "Used Memory", "Free Memory")).append("\n");
for (int i = 0; i < nDevices; i++) {
try {
Class<?> c = Class.forName("org.nd4j.jita.allocator.pointers.CudaPointer");
Constructor<?> constructor = c.getConstructor(long.class);
String name = nativeOps.getDeviceName(i);
long total = nativeOps.getDeviceTotalMemory(i);
long free = nativeOps.getDeviceFreeMemory(i);
Expand Down Expand Up @@ -638,25 +642,4 @@ private static void appendActivationShapes(ComputationGraph net, StringBuilder s
sb.append(fBytes("Total Activation Gradient Memory", totalExOutput));
}

public static Pair<String,String> inferVersion(){
List<VersionInfo> vi = VersionCheck.getVersionInfos();

String dl4jVersion = null;
String dl4jCudaArtifact = null;
for(VersionInfo v : vi){
if("org.deeplearning4j".equals(v.getGroupId()) && "deeplearning4j-core".equals(v.getArtifactId())){
String version = v.getBuildVersion();
if(version.contains("SNAPSHOT")){
dl4jVersion = version + " (" + v.getCommitIdAbbrev() + ")";
}
dl4jVersion = version;
} else if("org.deeplearning4j".equals(v.getGroupId()) && v.getArtifactId() != null && v.getArtifactId().contains("deeplearning4j-cuda")){
dl4jCudaArtifact = v.getArtifactId();
}

}

return new Pair<>(dl4jVersion, dl4jCudaArtifact);
}

}
7 changes: 7 additions & 0 deletions nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/pom.xml
Expand Up @@ -174,6 +174,13 @@
</dependency>


<!-- oshi: Used for collecting system information for system info reporting -->
<dependency>
<groupId>com.github.oshi</groupId>
<artifactId>oshi-core</artifactId>
<version>${oshi.version}</version>
</dependency>

<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
Expand Down
@@ -0,0 +1,39 @@
/*******************************************************************************
* Copyright (c) 2015-2019 Skymind, Inc.
*
* This program and the accompanying materials are made available under the
* terms of the Apache License, Version 2.0 which is available at
* https://www.apache.org/licenses/LICENSE-2.0.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*
* SPDX-License-Identifier: Apache-2.0
******************************************************************************/

package org.nd4j.systeminfo;

import lombok.AllArgsConstructor;
import lombok.Data;

@Data
@AllArgsConstructor
public class GPUInfo {

public static final String fGpu = " %-30s %-5s %24s %24s %24s";

private String name;
private long totalMemory;
private long freeMemory;
int major;
int minor;

@Override
public String toString(){
return String.format(fGpu, name, major + "." + minor, SystemInfo.fBytes(totalMemory),
SystemInfo.fBytes(totalMemory - freeMemory), SystemInfo.fBytes(freeMemory));
}
}
@@ -0,0 +1,23 @@
/*******************************************************************************
* Copyright (c) 2015-2019 Skymind, Inc.
*
* This program and the accompanying materials are made available under the
* terms of the Apache License, Version 2.0 which is available at
* https://www.apache.org/licenses/LICENSE-2.0.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*
* SPDX-License-Identifier: Apache-2.0
******************************************************************************/

package org.nd4j.systeminfo;

import java.util.List;

public interface GPUInfoProvider {
List<GPUInfo> getGPUs();
}

0 comments on commit 3480e6d

Please sign in to comment.