ci: add regression test for daemon startup without nvrc.log=trace#148
Open
zvonkok wants to merge 2 commits intoNVIDIA:mainfrom
Open
ci: add regression test for daemon startup without nvrc.log=trace#148zvonkok wants to merge 2 commits intoNVIDIA:mainfrom
zvonkok wants to merge 2 commits intoNVIDIA:mainfrom
Conversation
Enable daemon synchronization without debug logging by implementing dual-path syslog forwarding: messages route to /run/syslog.log when logging is disabled, /dev/kmsg when enabled. Prevents wait_for_marker timeouts that blocked nvidia-persistenced and nv-fabricmanager startup. Added 12 tests covering forward_message(), open_kmsg() path selection, and end-to-end integration flows. Signed-off-by: Zvonko Kaiser <zkaiser@nvidia.com>
Test both with and without nvrc.log=trace to ensure daemon startup synchronization works in both modes. First test ensures nvrc.log=trace is present and verifies nvrc logs appear in VM dmesg. Second test removes nvrc.log=trace and confirms nvidia-smi still works without log spam, validating the /run/syslog.log fallback mechanism. Signed-off-by: Zvonko Kaiser <zkaiser@nvidia.com>
Contributor
There was a problem hiding this comment.
Pull request overview
Adds a file-based syslog fallback and updates log-reader selection so daemon startup markers remain available even when nvrc.log=trace is disabled, along with CI coverage for both modes.
Changes:
- Add
syslog.rsforwarding to/run/syslog.logwhen debug logging isn’t enabled. - Update
kmsg::open_kmsg()to auto-select between/dev/kmsgand/run/syslog.logbased on current log level. - Extend CI workflow to test
nvidia-smiwith and withoutnvrc.log=trace.
Reviewed changes
Copilot reviewed 3 out of 3 changed files in this pull request and generated 12 comments.
| File | Description |
|---|---|
src/syslog.rs |
Introduces forward_message() and a /run/syslog.log sink plus new unit tests. |
src/kmsg.rs |
Adds auto-selection logic in open_kmsg() and expands tests around marker waiting and selection behavior. |
.github/workflows/ci.yaml |
Adds two CI runs toggling nvrc.log=trace to detect regressions. |
💡 Add Copilot custom instructions for smarter, more guided reviews. Learn how to get started.
Comment on lines
+98
to
+101
| /// When debug logging is enabled, write to kmsg via trace!(). | ||
| /// Otherwise, append to /run/syslog.log for daemon synchronization. | ||
| fn forward_message(msg: &str) -> std::io::Result<()> { | ||
| if log::log_enabled!(log::Level::Debug) { |
| use std::fs; | ||
|
|
||
| log::set_max_level(log::LevelFilter::Off); | ||
| let _ = fs::remove_file(SYSLOG_FILE); |
| .map(Mutex::new) | ||
| })?; | ||
|
|
||
| let mut file = logfile.lock().unwrap(); |
Comment on lines
+49
to
+54
| /// - Debug or higher: /dev/kmsg (kernel message buffer) | ||
| /// - Off: /run/syslog.log (file-based syslog sink) | ||
| /// For /dev/kmsg, seeks to end to skip boot history. | ||
| /// Call *before* spawning a daemon to avoid missing its marker. | ||
| pub fn open_kmsg(path: &str) -> BufReader<File> { | ||
| // Auto-select log source based on log level |
Comment on lines
+325
to
+342
| #[test] | ||
| fn test_wait_for_marker_on_syslog_file() { | ||
| use std::io::Write; | ||
| use tempfile::NamedTempFile; | ||
|
|
||
| // Logging disabled, so open_kmsg should use /run/syslog.log | ||
| log::set_max_level(log::LevelFilter::Off); | ||
|
|
||
| // Create a temporary syslog file | ||
| let tmp = NamedTempFile::new().unwrap(); | ||
| let path = tmp.path().to_string_lossy().to_string(); | ||
|
|
||
| // Write initial content | ||
| fs::write(&path, "initial log entry\n").unwrap(); | ||
|
|
||
| // Open reader on the temp file | ||
| let mut reader = open_kmsg(&path); | ||
|
|
| output=$(sudo nerdctl run --runtime io.containerd.kata.v2 --device /dev/vfio/devices/vfio0 --rm ubuntu -- sh -c "nvidia-smi && sudo dmesg | tail -50" 2>&1) | ||
| exit_code=$? | ||
| echo "$output" | ||
|
|
Comment on lines
+316
to
+327
| #[test] | ||
| #[serial_test::serial] | ||
| fn test_forward_message_to_file_when_logging_disabled() { | ||
| use std::fs; | ||
|
|
||
| // Without trace logging, daemons still need synchronization via file | ||
| log::set_max_level(log::LevelFilter::Off); | ||
|
|
||
| let msg = "test message for file"; | ||
| let result = forward_message(msg); | ||
| assert!(result.is_ok()); | ||
|
|
Comment on lines
+359
to
+373
| use tempfile::NamedTempFile; | ||
|
|
||
| // Without debug logging, daemon markers must be readable from file | ||
| log::set_max_level(log::LevelFilter::Off); | ||
|
|
||
| let tmp = NamedTempFile::new().unwrap(); | ||
| let path = tmp.path().to_str().unwrap(); | ||
| let test_msg = "test content for path selection\n"; | ||
| fs::write(path, test_msg).unwrap(); | ||
|
|
||
| let mut reader = open_kmsg(path); | ||
| let mut buffer = String::new(); | ||
| let _ = reader.read_to_string(&mut buffer); | ||
|
|
||
| assert!(buffer.contains("test content")); |
Comment on lines
+393
to
+395
| // File must exist before daemon spawns to avoid race condition | ||
| log::set_max_level(log::LevelFilter::Off); | ||
| assert!(!log::log_enabled!(log::Level::Debug)); |
Comment on lines
+425
to
+453
| #[test] | ||
| #[serial] | ||
| fn test_end_to_end_syslog_to_wait_for_marker_integration() { | ||
| use std::os::unix::net::UnixDatagram; | ||
| use tempfile::TempDir; | ||
|
|
||
| // Verify complete flow: daemon → syslog → file → marker detection | ||
| log::set_max_level(log::LevelFilter::Off); | ||
|
|
||
| let tmp_dir = TempDir::new().unwrap(); | ||
| let sock_path = tmp_dir.path().join("test.sock"); | ||
| let log_file = tmp_dir.path().join("syslog.log"); | ||
|
|
||
| let syslog_sock = UnixDatagram::bind(&sock_path).expect("bind test socket"); | ||
| fs::write(&log_file, "").unwrap(); | ||
|
|
||
| // Reader must open before daemon starts to catch marker | ||
| let mut reader = open_kmsg(log_file.to_str().unwrap()); | ||
|
|
||
| let client = UnixDatagram::unbound().unwrap(); | ||
| let marker = "INTEGRATION_TEST_MARKER"; | ||
| client | ||
| .send_to(format!("<6>{}", marker).as_bytes(), &sock_path) | ||
| .unwrap(); | ||
|
|
||
| use std::io::Write; | ||
| let mut file = OpenOptions::new().append(true).open(&log_file).unwrap(); | ||
| writeln!(file, "{}", marker).unwrap(); | ||
| file.flush().unwrap(); |
manuelh-dev
approved these changes
Mar 19, 2026
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.This suggestion is invalid because no changes were made to the code.Suggestions cannot be applied while the pull request is closed.Suggestions cannot be applied while viewing a subset of changes.Only one suggestion per line can be applied in a batch.Add this suggestion to a batch that can be applied as a single commit.Applying suggestions on deleted lines is not supported.You must change the existing code in this line in order to create a valid suggestion.Outdated suggestions cannot be applied.This suggestion has been applied or marked resolved.Suggestions cannot be applied from pending reviews.Suggestions cannot be applied on multi-line comments.Suggestions cannot be applied while the pull request is queued to merge.Suggestion cannot be applied right now. Please check back later.
Test both with and without nvrc.log=trace to ensure daemon startup synchronization works in both modes. The first test ensures that nvrc.log=trace is present and verifies that nvrc logs appear in the VM dmesg. Second test removes nvrc.log=trace and confirms nvidia-smi still works without log spam, validating the /run/syslog.log fallback mechanism.