Skip to content

Commit

Permalink
Add retry reading/setting mux status to simulated y-cable driver (#221)
Browse files Browse the repository at this point in the history
Description
Add retry reading/setting mux status to simulated y-cable driver

Motivation and Context
When DUT is rebooted, xcvrd may call the simulated y-cable driver to get mux
direction before the mgmt interface is up. The simulated y-cable driver needs
to send HTTP request to mux simulator server to read mux status. It has
dependency on the mgmt interface. So, this could result in below error:

Oct 16 03:15:44.029933 sonic-dut ERR pmon#xcvrd[34]: y_cable_port 1: GET http://192.168.1.33:8082/mux/vms21-6/0 for physical_port 1 failed with URLError(OSError(113, 'No route to host'))
Oct 16 03:15:44.030306 sonic-dut ERR pmon#xcvrd[34]: Error: Could not establish the active side for  Y cable port Ethernet0 to perform read_y_cable update state db

This would cause other problem and may result in same interface on both upper
tor and lower tor remains in "standby" state.

The fix is to add retry to the simulated y-cable driver for reading or setting
mux status. The retry interval is 1 second. Retry timeout is 30 seconds.

How Has This Been Tested?
* The issue can be reliably reproduced on a 7260 dualtor testbed after run the test_acl::TestAclWithReboot cases.
With this fix, the issue cannot be reproduced. 
* Tested the config mux mode active command with or without icmp responder.
* Tested updating mux status by call mux simulator API to see if the new status is reflected to DUTs.

Signed-off-by: Xin Wang <xiwang5@microsoft.com>
  • Loading branch information
wangxin committed Oct 19, 2021
1 parent 2ebd786 commit 8bb9c5a
Showing 1 changed file with 91 additions and 34 deletions.
125 changes: 91 additions & 34 deletions sonic_y_cable/microsoft/y_cable_simulated.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import os
import urllib.request
import urllib.error
import time

from sonic_py_common import device_info
from portconfig import get_port_config
Expand All @@ -32,6 +33,10 @@ class YCable(YCableBase):
NIC_VOLTAGE = 5.0
LOCAL_VOLTAGE = 5.0

POLL_TIMEOUT = 30
POLL_INTERVAL = 1
URLOPEN_TIMEOUT = 5

def __init__(self, port, logger):
YCableBase.__init__(self, port, logger)
if not os.path.exists(self.MUX_SIMULATOR_CONFIG_FILE) or not os.path.isfile(self.MUX_SIMULATOR_CONFIG_FILE):
Expand Down Expand Up @@ -85,22 +90,45 @@ def _get(self, url=None):
else:
get_url = self._url

try:
start_time = time.time()
attempt = 1
while True:
try:
req = urllib.request.Request(get_url)
with urllib.request.urlopen(req) as resp:
return json.loads(resp.read().decode('utf-8'))
except urllib.error.HTTPError as e:
self.log_error('GET {} for physical_port {} failed with {}, detail: {}'.format(
try:
req = urllib.request.Request(get_url)
with urllib.request.urlopen(req, timeout=self.URLOPEN_TIMEOUT) as resp:
return json.loads(resp.read().decode('utf-8'))
except urllib.error.HTTPError as e:
self.log_error('attempt={}, GET {} for physical_port {} failed with {}, detail: {}'.format(
attempt,
get_url,
self.port,
repr(e),
e.read()))
except (urllib.error.URLError, json.decoder.JSONDecodeError, Exception) as e:
self.log_error('attempt={}, GET {} for physical_port {} failed with {}'.format(
attempt,
get_url,
self.port,
repr(e)))

# Retry in case of exception, to workaround 'no route to host' issue after pmon restart
if (time.time() - start_time) > self.POLL_TIMEOUT:
self.log_error('Retry GET {} for physical port {} timeout after {} seconds, attempted={}'.format(
get_url,
self.port,
repr(e),
e.read()))
except (urllib.error.URLError, json.decoder.JSONDecodeError, Exception) as e:
self.log_error('GET {} for physical_port {} failed with {}'.format(
get_url,
self.port,
repr(e)))
self.POLL_TIMEOUT,
attempt
))
break
else:
self.log_notice('Sleep {} seconds to retry GET {} for physical port {}'.format(
self.POLL_INTERVAL,
get_url,
self.port
))
attempt += 1
time.sleep(self.POLL_INTERVAL)

return None

Expand All @@ -118,27 +146,52 @@ def _post(self, url=None, data=None):
else:
post_data = None

try:
start_time = time.time()
attempt = 1
while True:
try:
headers = {'Accept': 'application/json', 'Content-Type': 'application/json'}
req = urllib.request.Request(post_url, post_data, headers, method='POST')
with urllib.request.urlopen(req) as resp:
return json.loads(resp.read().decode('utf-8'))
except urllib.error.HTTPError as e:
self.log_error('POST {} with data {} for physical_port {} failed with {}, detail: {}'.format(
post_url,
try:
headers = {'Accept': 'application/json', 'Content-Type': 'application/json'}
req = urllib.request.Request(post_url, post_data, headers, method='POST')
with urllib.request.urlopen(req, timeout=self.URLOPEN_TIMEOUT) as resp:
return json.loads(resp.read().decode('utf-8'))
except urllib.error.HTTPError as e:
self.log_error('attempt={}, POST {} with data {} for physical_port {} failed with {}, detail: {}'.format(
attempt,
post_url,
post_data,
self.port,
repr(e),
e.read()
))
except (urllib.error.URLError, json.decoder.JSONDecodeError, Exception) as e:
self.log_error('attempt={}, POST {} with data {} for physical_port {} failed with {}'.format(
attempt,
post_url,
post_data,
self.port,
repr(e)
))

# Retry in case of exception, to workaround 'no route to host' issue after pmon restart
if time.time() - start_time > self.POLL_TIMEOUT:
self.log_error('Retry POST {} with data{} for physical port {} timeout after {} seconds, attempted={}'.format(
get_url,
post_data,
self.port,
repr(e),
e.read()
self.POLL_TIMEOUT,
attempt
))
except (urllib.error.URLError, json.decoder.JSONDecodeError, Exception) as e:
self.log_error('POST {} with data {} for physical_port {} failed with {}'.format(
post_url,
break
else:
self.log_notice('Sleep {} seconds to retry POST {} with data {} for physical port {}'.format(
self.POLL_INTERVAL,
get_url,
post_data,
self.port,
repr(e)
self.port
))
attempt += 1
time.sleep(self.POLL_INTERVAL)

return None

Expand Down Expand Up @@ -244,13 +297,17 @@ def get_mux_direction(self):
TARGET_UNKNOWN, if mux direction API fails.
"""
status = self._get_status()
if not status:

if not isinstance(status, dict):
return self.TARGET_UNKNOWN

if status['active_side'] == self.UPPER_TOR:
return self.TARGET_TOR_A
elif status['active_side'] == self.LOWER_TOR:
return self.TARGET_TOR_B
if 'active_side' in status:
if status['active_side'] == self.UPPER_TOR:
return self.TARGET_TOR_A
elif status['active_side'] == self.LOWER_TOR:
return self.TARGET_TOR_B
else:
return self.TARGET_UNKNOWN
else:
return self.TARGET_UNKNOWN

Expand Down Expand Up @@ -1243,4 +1300,4 @@ def debug_dump_registers(self, option=None):
which would help diagnose the cable for proper functioning
"""

return {}
return {}

0 comments on commit 8bb9c5a

Please sign in to comment.