forked from eryl/gpulog
-
Notifications
You must be signed in to change notification settings - Fork 0
/
gpulog.py
73 lines (63 loc) · 2.88 KB
/
gpulog.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import subprocess
import csv
import os
from datetime import datetime
activity_threshold = 1 # 默认值为1%
low_utilization_duration = 20 # 默认低利用率持续时间为10秒
def generate_filename(timestamp):
return f"gpulog_{timestamp.replace(' ', '_').replace(':', '-').replace('/', '-')}.csv"
def write_to_csv(filename, data):
if not filename:
return
if not os.path.exists(filename):
with open(filename, 'w', newline='') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(header) # Write header first if file doesn't exist
writer.writerow(data)
print(f"Started writing to {filename}")
else:
with open(filename, 'a', newline='') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(data)
def main():
cmd = ["nvidia-smi", "--query-gpu=timestamp,pci.bus_id,utilization.gpu,utilization.memory,memory.used,temperature.gpu,temperature.memory,power.draw,ecc.errors.corrected.volatile.total,ecc.errors.corrected.aggregate.total", "--format=csv", "-l", "1"]
process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
current_file = None
global header
header = None
low_utilization_start_time = None
try:
for line in iter(process.stdout.readline, ''):
line = line.strip()
if "timestamp" in line: # This is the header line
header = line.split(", ")
continue
columns = line.split(", ")
try:
gpu_utilization = float(columns[2].replace('%', '').strip())
except ValueError:
# 当utilization.gpu为非数字时
if not current_file:
current_file = generate_filename(columns[0])
write_to_csv(current_file, columns)
print(f"Finished writing to {current_file} due to non-numeric GPU utilization.")
break
if gpu_utilization > activity_threshold:
if not current_file:
current_file = generate_filename(columns[0])
write_to_csv(current_file, columns)
low_utilization_start_time = None # Reset the timer
else:
if not low_utilization_start_time:
low_utilization_start_time = datetime.now()
elif (datetime.now() - low_utilization_start_time).seconds >= low_utilization_duration:
if current_file:
print(f"Finished writing to {current_file} due to low GPU utilization.")
current_file = None
low_utilization_start_time = None # Reset the timer
elif current_file:
write_to_csv(current_file, columns)
finally:
process.terminate()
if __name__ == "__main__":
main()