# JupyterLab工具，用於檢查Server資源使用量(CPU、RAM、GPU)

In [1]:
# 使用 nvidia-smi 指令檢視顯卡工作狀況
import subprocess

def check_gpu_status():
    # 執行命令 'nvidia-smi'
    result = subprocess.run(["nvidia-smi"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
    # 輸出結果
    return result.stdout

# gpu_result = check_gpu_status()
# print(gpu_result)

In [2]:
# GPU使用情形

import tensorflow as tf

def check_gpu_usage():
    gpus = tf.config.list_physical_devices('GPU')
    for i, gpu in enumerate(gpus):
        print("Name:", gpu.name, "  Type:", gpu.device_type)
        details = tf.config.experimental.get_memory_info(f'GPU:{i}')
        print("Memory details:", details)

# check_gpu_usage()

In [3]:
import psutil

def check_system_usage():
    # CPU 使用情況
    print(f"CPU usage: {psutil.cpu_percent()}%")
    
    # 已使用 RAM ; 占用比率
    print(f"Active RAM: {psutil.virtual_memory().active / (1024.0 ** 3):.2f} GB; percent: {psutil.virtual_memory().percent}%")
    
    # 可使用 RAM
    print(f"Available RAM: {psutil.virtual_memory().available / (1024.0 ** 3):.2f} GB")

    # 總 RAM
    print(f"Total RAM: {psutil.virtual_memory().total / (1024.0 ** 3):.2f} GB")

def return_system_usage():
    # 使用變量而非列印來儲存訊息
    cpu_usage = f"CPU usage: {psutil.cpu_percent()}%"
    active_ram = f"Active RAM: {psutil.virtual_memory().active / (1024.0 ** 3):.2f} GB; percent: {psutil.virtual_memory().percent}%"
    available_ram = f"Available RAM: {psutil.virtual_memory().available / (1024.0 ** 3):.2f} GB"
    total_ram = f"Total RAM: {psutil.virtual_memory().total / (1024.0 ** 3):.2f} GB"
    
    # 將所有訊息組合成一個字串並返回
    return f"{cpu_usage}\n{active_ram}\n{available_ram}\n{total_ram}"


def check_disk_usage():
    print(f"Disk Usage: {psutil.disk_usage('/').percent}%")
    
def check_network_usage():
    io_stats = psutil.net_io_counters()
    print(f"Bytes Sent: {io_stats.bytes_sent / (1024 ** 2):.2f} MB")
    print(f"Bytes Received: {io_stats.bytes_recv / (1024 ** 2):.2f} MB")


# check_system_usage
# check_disk_usage()
# check_network_usage()

In [None]:
from IPython.display import clear_output
import subprocess
import time

def monitor_resources(interval=1):
    # 預先獲取一次數據
    system_usage = return_system_usage()
    gpu_status = check_gpu_status()
    
    try:
        while True:
            clear_output(wait=True)  # 清除上一次的輸出
            
            # 顯示數據
            print(f"{system_usage}\n")
            print(gpu_status)

            time.sleep(interval)
            
            # 獲取下一次的數據
            system_usage = return_system_usage()
            gpu_status = check_gpu_status()
            
    except KeyboardInterrupt:
        print("Monitoring stopped.")

interval = 1
monitor_resources(interval)

CPU usage: 1.1%
Active RAM: 12.68 GB; percent: 10.4%
Available RAM: 98.73 GB
Total RAM: 110.13 GB

Thu Aug 22 11:32:58 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.183.01             Driver Version: 535.183.01   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA A30                     On  | 00005672:00:00.0 Off |                   On |
| N/A   29C    P0              48W / 165W |                  N/A |     N/A      Default |
|                                         |                      |              Enabled |
+-----------------------------------------+----------------