Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions src/defib/cli/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -1666,6 +1666,28 @@ def install(
}


def _nand_bootargs(rootfs_is_ubi: bool) -> str:
"""Return kernel cmdline for OpenIPC NAND install.

Don't rely on U-Boot's compiled-in default bootargs — recent OpenIPC
builds default to squashfs+ubiblock, which kernel-panics with
"Unable to mount root fs" when the actual rootfs volume contains
UBIFS. Always set bootargs explicitly to match what we wrote.

The mtdparts substring must agree with the layout we set in U-Boot
(1M boot, 1M env, 8M kernel, rest UBI) so ``ubi.mtd=3`` resolves to
the right partition.
"""
base = (
"mem=256M console=ttyAMA0,115200 panic=20 ubi.mtd=3,2048 "
"mtdparts=hinand:1024k(boot),1024k(env),8192k(kernel),-(ubi)"
)
if rootfs_is_ubi:
return f"root=ubi0:rootfs rootfstype=ubifs {base}"
# Squashfs on a UBI block device (modern OpenIPC layout)
return f"root=/dev/ubiblock0_0 rootfstype=squashfs ubi.block=0,0 init=/init {base}"


async def _install_async(
chip: str,
firmware_path: str,
Expand Down Expand Up @@ -2136,6 +2158,9 @@ async def tftp_and_flash(
r"setenv bootcmd nand read ${baseaddr} 0x200000 0x800000\; bootm ${baseaddr}",
timeout=3.0,
)
# Match bootargs to actual rootfs format — see _nand_bootargs.
bootargs = _nand_bootargs(rootfs_is_ubi=is_ubi_image(rootfs_data))
await _cmd(f"setenv bootargs {bootargs}", timeout=3.0)
else:
nor_cmd = "setnor8m" if nor_size < 16 else "setnor16m"
if output == "human":
Expand Down
27 changes: 16 additions & 11 deletions src/defib/protocol/hisilicon_standard.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,12 +313,16 @@ async def _send_ddr_step(
transport: Transport,
profile: SoCProfile,
on_progress: Callable[[ProgressEvent], None] | None = None,
) -> bool:
) -> str | None:
"""Send DDR initialization steps to SRAM.

Matches HiTool's exact sequence:
1. sendFrameForStart: blast HEAD(64, ADDRESS0) until ACK (handshake)
2. For each step (PRESTEP0, DDRSTEP0, PRESTEP1): HEAD+DATA+TAIL

Returns ``None`` on success, or a string describing which sub-step
failed. Distinguishes handshake failure from frame-send failures
so error messages aren't misleading.
"""
_emit(on_progress, ProgressEvent(
stage=Stage.DDR_INIT, bytes_sent=0, bytes_total=64,
Expand All @@ -331,17 +335,17 @@ async def _send_ddr_step(
# sendFrameForStart: blast HEAD as handshake (HiTool approach)
if prestep is not None:
if not await self._send_frame_for_start(transport, profile):
return False
return "handshake (sendFrameForStart) timed out"

# PRESTEP0: HEAD+DATA+TAIL (HEAD sent again per HiTool's loop)
logger.debug(
"=== PRESTEP0 === address=0x%08X data=%d bytes",
addr, len(prestep),
)
if not await self._send_head(transport, 64, addr):
return False
return "PRESTEP0 HEAD frame not ACKed"
if not await self._send_data(transport, 1, prestep):
return False
return "PRESTEP0 DATA frame not ACKed"
if not await self._send_tail(transport, 2):
logger.debug("PRESTEP0 TAIL not ACKed (non-fatal)")

Expand All @@ -352,10 +356,10 @@ async def _send_ddr_step(
)
ddr_data = profile.ddr_step_data
if not await self._send_head(transport, 64, addr):
return False
return "DDRSTEP0 HEAD frame not ACKed"

if not await self._send_data(transport, 1, ddr_data):
return False
return "DDRSTEP0 DATA frame not ACKed"

if not await self._send_tail(transport, 2):
logger.debug("DDRSTEP0 TAIL not ACKed (non-fatal)")
Expand All @@ -368,17 +372,17 @@ async def _send_ddr_step(
addr, len(prestep1),
)
if not await self._send_head(transport, len(prestep1), addr):
return False
return "PRESTEP1 HEAD frame not ACKed"
if not await self._send_data(transport, 1, prestep1):
return False
return "PRESTEP1 DATA frame not ACKed"
if not await self._send_tail(transport, 2):
logger.debug("PRESTEP1 TAIL not ACKed (non-fatal)")

_emit(on_progress, ProgressEvent(
stage=Stage.DDR_INIT, bytes_sent=64, bytes_total=64,
message="DDR step complete",
))
return True
return None

@staticmethod
def _detect_spl_size(firmware: bytes, profile_max: int) -> int:
Expand Down Expand Up @@ -537,10 +541,11 @@ async def send_firmware(
profile.name, len(firmware), spl_override is not None,
)

if not await self._send_ddr_step(transport, profile, on_progress):
ddr_err = await self._send_ddr_step(transport, profile, on_progress)
if ddr_err is not None:
return RecoveryResult(
success=False, stages_completed=stages,
error="Failed to send DDR step",
error=f"DDR init failed: {ddr_err}",
)
stages.append(Stage.DDR_INIT)

Expand Down
194 changes: 129 additions & 65 deletions src/defib/recovery/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ async def run(
on_progress: Callable[[ProgressEvent], None] | None = None,
on_log: Callable[[LogEvent], None] | None = None,
send_break: bool = False,
max_handshake_attempts: int = 2,
) -> RecoveryResult:
"""Execute the full recovery: handshake → firmware transfer.

Expand All @@ -75,6 +76,9 @@ async def run(
on_progress: Callback for progress events.
on_log: Callback for log events.
send_break: If True, send Ctrl-C after upload to enter U-Boot console.
max_handshake_attempts: Retry the power-cycle + handshake + DDR-init
phase up to this many times if the transient handshake fails.
Only applies when programmatic power control is configured.

Returns:
RecoveryResult with success status.
Expand All @@ -97,93 +101,153 @@ async def run(
isinstance(protocol, HiSiliconStandard) and protocol.uses_frame_blast_handshake
)

# Power cycle
if self._power and self._poe_port:
if on_log:
# Retry the transient phase (power-cycle + handshake + DDR init) up
# to ``max_handshake_attempts`` times. Only meaningful when we have
# programmatic power control — manual power cycling would require
# human re-intervention.
can_retry = bool(self._power and self._poe_port)
attempts = max_handshake_attempts if can_retry else 1

firmware = self._load_firmware()
handshake: HandshakeResult | None = None
last_attempt_error: str | None = None

for attempt in range(1, attempts + 1):
if attempt > 1 and on_log:
on_log(LogEvent(
level="info",
message=f"Power-cycling device on {self._poe_port}...",
))
if on_progress:
on_progress(ProgressEvent(
stage=Stage.POWER_CYCLE, bytes_sent=0, bytes_total=1,
message=f"Power-cycling {self._poe_port}...",
level="warn",
message=(
f"Retrying handshake (attempt {attempt}/{attempts}) — "
f"previous error: {last_attempt_error}"
),
))

try:
await self._power.power_cycle(self._poe_port)
except Exception as e:
elapsed = (time.monotonic() - start_time) * 1000
# Power cycle
if self._power and self._poe_port:
if on_log:
on_log(LogEvent(level="error", message=f"Power cycle failed: {e}"))
return RecoveryResult(
success=False,
error=f"Power cycle failed: {e}",
elapsed_ms=elapsed,
on_log(LogEvent(
level="info",
message=f"Power-cycling device on {self._poe_port}...",
))
if on_progress:
on_progress(ProgressEvent(
stage=Stage.POWER_CYCLE, bytes_sent=0, bytes_total=1,
message=f"Power-cycling {self._poe_port}...",
))

try:
await self._power.power_cycle(self._poe_port)
except Exception as e:
elapsed = (time.monotonic() - start_time) * 1000
if on_log:
on_log(LogEvent(level="error", message=f"Power cycle failed: {e}"))
return RecoveryResult(
success=False,
error=f"Power cycle failed: {e}",
elapsed_ms=elapsed,
)

# Drain serial until line stays quiet for 500ms. Replaces a
# fixed 2-second sleep + flush_input — that approach can miss
# late-arriving stale bytes (the camera may still be powering
# down when the flush runs) and isn't robust against pyserial
# buffer caveats. Quiet-detection is deterministic: a
# powered-off chip cannot transmit.
discarded = await transport.drain_until_silent(
quiet_period=0.5, max_wait=5.0,
)
if discarded and on_log:
on_log(LogEvent(
level="info",
message=f"Drained {discarded} stale bytes from serial",
))

# Wait for power to actually be cut, then flush any warm-reboot
# garbage from the serial buffer.
import asyncio
await asyncio.sleep(2.0)
await transport.flush_input()
if on_progress:
on_progress(ProgressEvent(
stage=Stage.POWER_CYCLE, bytes_sent=1, bytes_total=1,
message="Power cycle complete",
))

if on_progress:
on_progress(ProgressEvent(
stage=Stage.POWER_CYCLE, bytes_sent=1, bytes_total=1,
message="Power cycle complete",
))
# Handshake — skip for frame-blast chips (handled inside send_firmware)
if frame_blast:
if on_log:
on_log(LogEvent(
level="info",
message=f"Using sendFrameForStart handshake for {self.chip}",
))
handshake = HandshakeResult(success=True, message="Frame-blast (deferred)")
elif self._power and self._poe_port:
# Power-cycle mode with 0x20→0xAA handshake: flood 0xAA
if on_log:
on_log(LogEvent(
level="info",
message=f"Starting {self._protocol_cls.name()} handshake for {self.chip}",
))
import asyncio as _asyncio
handshake_task = _asyncio.create_task(
protocol.handshake(transport, on_progress)
)
handshake = await handshake_task
else:
# Manual power cycling — just start handshake and wait
if on_log:
on_log(LogEvent(
level="info",
message=f"Starting {self._protocol_cls.name()} handshake for {self.chip}",
))
handshake = await protocol.handshake(transport, on_progress)

# Handshake — skip for frame-blast chips (handled inside send_firmware)
if frame_blast:
if on_log:
on_log(LogEvent(
level="info",
message=f"Using sendFrameForStart handshake for {self.chip}",
))
handshake = HandshakeResult(success=True, message="Frame-blast (deferred)")
elif self._power and self._poe_port:
# Power-cycle mode with 0x20→0xAA handshake: flood 0xAA
# If non-frame-blast handshake failed and we can retry, try again
if not handshake.success:
last_attempt_error = f"handshake: {handshake.message}"
if attempt < attempts:
continue
break

# Handshake OK (or deferred for frame-blast). Send firmware.
if on_log:
on_log(LogEvent(
level="info",
message=f"Starting {self._protocol_cls.name()} handshake for {self.chip}",
message=f"Sending {len(firmware)} bytes of firmware...",
))
import asyncio
handshake_task = asyncio.create_task(
protocol.handshake(transport, on_progress)
send_result = await protocol.send_firmware(
transport, firmware, on_progress,
)
if send_result.success:
# Mutate handshake variable so post-loop code sees success.
handshake_succeeded_result = send_result
break

# Firmware send failed — only retry if it failed in the early
# handshake/DDR phase (frame-blast handshake or DDR init).
# Once we're past DDR init, retrying is unlikely to help and
# costs another 30+ seconds of upload time.
err = send_result.error or ""
is_early = (
Stage.DDR_INIT not in send_result.stages_completed
)
handshake = await handshake_task
if is_early and attempt < attempts:
last_attempt_error = err
continue

# Either past-DDR failure or final attempt — bail out.
handshake_succeeded_result = send_result
break
else:
# Manual power cycling — just start handshake and wait
# Loop completed without break — all retries exhausted on handshake
elapsed = (time.monotonic() - start_time) * 1000
if on_log:
on_log(LogEvent(
level="info",
message=f"Starting {self._protocol_cls.name()} handshake for {self.chip}",
level="error",
message=f"Handshake failed after {attempts} attempts: {last_attempt_error}",
))
handshake = await protocol.handshake(transport, on_progress)
if not handshake.success:
elapsed = (time.monotonic() - start_time) * 1000
if on_log:
on_log(LogEvent(level="error", message=f"Handshake failed: {handshake.message}"))
return RecoveryResult(
success=False,
error=f"Handshake failed: {handshake.message}",
error=f"Handshake failed: {last_attempt_error}",
elapsed_ms=elapsed,
)

# Note: handshake success is already reported via on_progress,
# so we don't duplicate it here via on_log.

# Firmware transfer
firmware = self._load_firmware()
if on_log:
on_log(LogEvent(
level="info",
message=f"Sending {len(firmware)} bytes of firmware...",
))

result = await protocol.send_firmware(transport, firmware, on_progress)
result = handshake_succeeded_result
result.elapsed_ms = (time.monotonic() - start_time) * 1000

# Send break (Ctrl-C) to interrupt U-Boot autoboot
Expand Down
Loading
Loading