In [1]:
# An investigation of Reed Solomon reliability for the bounded-fault scenario
# Tanj Bennett, copyright (c) 2023, Avant-Gray LLC
# Usage rights granted under terms of the MIT open-source license.

# using the very nice work of Tomer Fileba, whose work remains separate
# at https://github.com/tomerfiliba-org/reedsolomon/blob/master/src/reedsolo/reedsolo.py

# uncomment the following line once to install that library
# pip install --upgrade reedsolo

In [2]:
# import and verify the import succeeded.

from reedsolo import RSCodec, ReedSolomonError
rsc = RSCodec(4)  # 4 ecc symbols.
rsc

<reedsolo.RSCodec at 0x1ec3f29ce30>

In [3]:
# import and flex random number generation.  Nothing fancy is needed

import random
r = random.randint(0,255)
r

99

In [4]:
# Encoding
# just a list of numbers/symbols:
x = bytearray([1,2,3,4,5,6,7,8, 9,10,11,12,13,14,15,16, 17,18,19,20,21,22,23,24, 25,26,27,28,29,30,31,32])
y = rsc.encode(x)

# used with 32 data bytes and 4 check symbols, this supports 256 bit data using RS(36,32)
len(x), x, len(y), y

(32,
 bytearray(b'\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f '),
 36,
 bytearray(b'\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f \xdd\xfa\xc1\xc6'))

In [5]:
# if we try to correct a bounded error (just 2 symbols)  RS(36,32) should fix them all.
#   We count the attempts which raised no exception.

detections = 0
lastBound = 17

for trials in range (0,10000):
    # keep permuting the data to ensure we see impact from both the data and the errors
    y = rsc.encode(y[0:32])

    i = 2 * random.randint(0,lastBound)
    y[i] = y[i] ^ random.randint(1,255)
    y[i+1] = y[i+1] ^ random.randint(1,255)

    try:
        z = rsc.decode(y)
#        print(y)
    except:
        detections = detections + 1
        
trials, detections, y

(9999,
 0,
 bytearray(b'8?\x958_\xc7d\xcc\xecT\xe1\xf2\xbd\x83\x96O\xeb\x99\xb5\x02m+i\xd6\xaf\xff\x15\xc8K\x14\xb6O\xf9>\xf5\xce'))

In [6]:
# The ratio of failures should be zero

shouldCorrectButFailed = detections / (trials + 1)
shouldCorrectButFailed


0.0

In [7]:
# if we try to correct a bounded error (just 2 symbols) but overwhelm it with a whole-chip error, how good is RS(36,32) at
#   reporting the uncorrectables?  We count the attempts which raised no exception.

detections = 0
lastChip = 8

for trials in range (0,10000):
    # keep permuting the data to ensure we see impact from both the data and the errors
    y = rsc.encode(y[0:32])

    i = 4 *random.randint(0,lastChip)
    y[i] = y[i] ^ random.randint(1,255)
    y[i+1] = y[i+1] ^ random.randint(1,255)
    y[i+2] = y[i+2] ^ random.randint(1,255)
    y[i+3] = y[i+3] ^ random.randint(1,255)

    try:
        z = rsc.decode(y)
#        print(y)
    except:
        detections = detections + 1
        
trials, detections, y

(9999,
 9919,
 bytearray(b'l\x03+aC\xf2\xa3\x9d\xa9n\x9f\x9a{\x99\xe1\xe6\xdc\xf1\xf4\x1f\xab\x1ez\x12E\x8d\x87\xd9W\x04\xf6\x95fT\xad\x1f'))

In [8]:
# The ratio of detection vs. deception attempts shows the quality of defense

probity = detections / (trials + 1)

# what is observed is nearly 99%: the RS(36,32) code will allow around 1% of whole-chip errors to pass undetected.
# the test actually stresses multichip errors too, 

probity


0.9919

In [9]:
# how good is RS(36,32) at detecting multichip uncorrectables?  These might be generated by RowHammer
# We count the attempts which raised no exception.

detections = 0
last = 35

for trials in range (0,10000):
    # keep permuting the data to ensure we see impact from both the data and the errors
    y = rsc.encode(y[0:32])

    i = random.randint(0,last)
    j = random.randint(0,last)
    while i == j:
        j = random.randint(0,last)
    k = random.randint(0,last)
    while (i == k) or (j == k):
        k = random.randint(0,last)
    m = random.randint(0,last)
    while (i == m) or (j == m) or (k == m):
        m = random.randint(0,last)
    n = random.randint(0,last)
    while (i == n) or (j == n) or (k == n) or (m == n):
        n = random.randint(0,last)

# comment out some of these to compare 3, 4, or 5 errors.  The ratio barely changes.
    y[i] = y[i] ^ random.randint(1,255)
    y[j] = y[j] ^ random.randint(1,255)
    y[k] = y[k] ^ random.randint(1,255)
    y[m] = y[m] ^ random.randint(1,255)
    y[n] = y[n] ^ random.randint(1,255)

    try:
        z = rsc.decode(y)
#        print(y)
    except:
        detections = detections + 1
        
trials, detections, y

(9999,
 9908,
 bytearray(b"m\xfa*V\xd2\xf1l\xb5n]\xa0\xee\xdc\xb4\x05\xc4S\xd5\x87\xf8\xc0g\xc3\xec\x91\xe3\xf2\x13:\xe8\\t\'\xa0T\x9b"))

In [10]:
# The ratio of detection vs. deception attempts shows the quality of defense

probity = detections / (trials + 1)

# what is observed is about 99%: the RS(36,32) code will allow about 1% of multichip errors to pass silently.

probity

0.9908

In [11]:
# this last run models using R-S with erasures specifying each chip in turn, to see if it will reject wrong chips.
# this would give hope of using the 9-chip position information to correct some full-chip errors with just 4 check symbols.
# we were not expecting it to work: it does not.

detections = 0
positions = 0
lastChip = 8
for trials in range (0,1000):
    # keep permuting the data to ensure we see impact from both the data and the errors
    y = rsc.encode(y[0:32])

    i = 4 * random.randint(0,lastChip)

    y[i] = y[i] ^ random.randint(1,255)
    y[i+1] = y[i+1] ^ random.randint(1,255)
    y[i+2] = y[i+2] ^ random.randint(1,255)
    y[i+3] = y[i+3] ^ random.randint(1,255)

    for j in range (0,9):
        jj = j * 4
        if (jj != i):
            positions += 1
            try:
                z = rsc.decode(y, erase_pos = [jj, jj+1, jj+2, jj+3])
#                print(y)
            except:
                detections = detections + 1

trials, detections, i, positions


(999, 0, 12, 8000)

In [12]:
# we would need a ratio of 0.99 or better for this method to make sense

safetyOfGuessing = detections / (trials + 1)

# what is observed is zero: the R-S algorithm will hallucinate values if you specify the wrong erasure positions.

safetyOfGuessing

0.0

In [13]:
from reedsolo import RSCodec, ReedSolomonError
rsc8 = RSCodec(8)  # 8 ecc symbols.

# how good is RS(40,32) at detecting multichip uncorrectables?  These might be generated by RowHammer
# We count the attempts which raised no exception.

detections = 0
last = 39

for trials in range (0,10000):
    # keep permuting the data to ensure we see impact from both the data and the errors
    y = rsc8.encode(y[0:32])

    i = random.randint(0,last)
    j = random.randint(0,last)
    while i == j:
        j = random.randint(0,last)
    k = random.randint(0,last)
    while (i == k) or (j == k):
        k = random.randint(0,last)
    m = random.randint(0,last)
    while (i == m) or (j == m) or (k == m):
        m = random.randint(0,last)
    n = random.randint(0,last)
    while (i == n) or (j == n) or (k == n) or (m == n):
        n = random.randint(0,last)
    p = random.randint(0,last)
    while (i == p) or (j == p) or (k == p) or (m == p) or (n == p):
        p = random.randint(0,last)
    q = random.randint(0,last)
    while (i == q) or (j == q) or (k == q) or (m == q) or (n == q) or (p == q):
        q = random.randint(0,last)

# comment out some of these to compare 5, 6, or 7 errors.  They are all perfectly detected.
# all 4-location errors are corrected.

    y[i] = y[i] ^ random.randint(1,255)
    y[j] = y[j] ^ random.randint(1,255)
    y[k] = y[k] ^ random.randint(1,255)
    y[m] = y[m] ^ random.randint(1,255)
    y[n] = y[n] ^ random.randint(1,255)
    y[p] = y[p] ^ random.randint(1,255)
    y[q] = y[q] ^ random.randint(1,255)

    try:
        z = rsc8.decode(y)
#        print(y)
    except:
        detections = detections + 1
        
trials, detections, y

(9999,
 0,
 bytearray(b'\xd8\xf7\x99/.C-O\xf2\xfe\xc6\xa8\x87Z\xf5W\x89\x08V5\x12\x9c\xa6\n\xdc\xf8\xd3O9\x84\xbd\xfa,\xdb\x14v\xf1\xf5Wr'))

In [14]:
# The ratio of detection vs. deception attempts shows the quality of defense

probity = detections / (trials + 1)

# what is observed is essentially perfect: the RS(40,32) code will detect all uncorrectable errors

probity

0.0