<a href="https://colab.research.google.com/github/ShrikantKGIT/general/blob/main/RAID_4_Style_Error_Correcting_Storage.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Python class that creates a higher-level, error-correcting storage device by managing three lower-level disk drives, similar to a RAID 4 setup.**

This system will use data striping across two disks and store parity information on a third, dedicated disk. This allows the system to automatically reconstruct data if one of the disks fails.

The implementation includes:

**A Mock Low-Level Disk:** A simple class that simulates a basic disk drive.

**A RAID Controller Class:** This class will manage the three-disk array and provide a high-level API for write, read, and recover operations.

**An Example:** A demonstration of writing data, simulating a disk failure, and then successfully reading the data back through automatic recovery.

In [1]:
import operator
from functools import reduce

# --- Part 1: The Low-Level Disk Drive API ---
class DiskDrive:
    """
    A simulated low-level disk drive. This is our basic hardware component.
    It can fail if its 'is_online' flag is set to False.
    """
    def __init__(self, id, num_blocks=256, block_size=256):
        """Initializes the disk with an ID and empty storage."""
        self.id = id
        self.num_blocks = num_blocks
        self.block_size = block_size
        self.storage = [bytearray(block_size) for _ in range(num_blocks)]
        self.is_online = True
        print(f"Disk {self.id} initialized.")

    def read_block(self, block_id):
        """Reads a single block of data. Fails if offline."""
        if not self.is_online:
            raise ConnectionError(f"Disk {self.id} is offline and cannot be read.")
        if not 0 <= block_id < self.num_blocks:
            raise IndexError(f"Disk Error: Block ID {block_id} is out of bounds.")
        return self.storage[block_id]

    def write_block(self, block_id, data):
        """Writes data to a single block. Fails if offline."""
        if not self.is_online:
            raise ConnectionError(f"Disk {self.id} is offline and cannot be written to.")
        if len(data) > self.block_size:
            raise ValueError("Data size exceeds block size.")

        # Pad data to fill the entire block
        self.storage[block_id] = data.ljust(self.block_size, b'\0')

    def unplug(self):
        """Simulates a disk failure."""
        self.is_online = False
        print(f"CRITICAL: Disk {self.id} has been unplugged and is now offline!")

    def plug_in(self):
        """Brings a disk back online (e.g., after being replaced)."""
        self.is_online = True
        print(f"INFO: Disk {self.id} is back online.")

# --- Part 2: The Higher-Level RAID Storage Controller ---
class RAIDStorage:
    """
    Manages three disk drives in a RAID 4-like configuration.
    - Disk 0: Data Stripe 0
    - Disk 1: Data Stripe 1
    - Disk 2: Dedicated Parity
    """
    DATA_DISKS = 2

    def __init__(self, disk0, disk1, disk2):
        """Initializes with three disk instances."""
        self.disks = [disk0, disk1, disk2]
        self.parity_disk_index = 2
        # Ensure all disks have the same geometry
        self.block_size = self.disks[0].block_size
        self.stripe_size = self.block_size * self.DATA_DISKS

    def _calculate_parity(self, data_blocks):
        """Calculates parity for a list of blocks using XOR."""
        # XOR all bytes together element-wise
        xor_result = reduce(lambda a, b: bytes([x ^ y for x, y in zip(a, b)]), data_blocks)
        return xor_result

    def write(self, logical_address, data):
        """
        Writes data to the RAID array, striping it across data disks
        and writing parity to the parity disk.
        """
        print(f"\nWriting {len(data)} bytes to logical address {logical_address}...")
        if len(data) > self.stripe_size:
            raise ValueError(f"Data size ({len(data)}) exceeds stripe size ({self.stripe_size}).")

        # Split data into chunks for each data disk
        block0_data = data[:self.block_size]
        block1_data = data[self.block_size:]

        # Pad data to ensure correct parity calculation
        block0_data = block0_data.ljust(self.block_size, b'\0')
        block1_data = block1_data.ljust(self.block_size, b'\0')

        # Calculate parity
        parity_data = self._calculate_parity([block0_data, block1_data])

        # Write blocks to their respective disks
        try:
            block_id = logical_address
            self.disks[0].write_block(block_id, block0_data)
            self.disks[1].write_block(block_id, block1_data)
            self.disks[2].write_block(block_id, parity_data)
            print(f"Write successful to physical block {block_id} on all disks.")
        except ConnectionError as e:
            print(f"Error during write: {e}. Cannot complete write with a failed disk.")

    def read(self, logical_address, size):
        """
        Reads data from the RAID array. If a disk has failed, it
        automatically reconstructs the missing data using parity.
        """
        print(f"\nReading {size} bytes from logical address {logical_address}...")
        block_id = logical_address

        data_blocks = [None, None]
        parity_block = None
        failed_disk_index = -1

        # Try to read from all disks
        for i in range(len(self.disks)):
            try:
                block = self.disks[i].read_block(block_id)
                if i < self.DATA_DISKS:
                    data_blocks[i] = block
                else:
                    parity_block = block
            except ConnectionError:
                print(f"Warning: Could not read from Disk {i}. It appears to be offline.")
                failed_disk_index = i

        # --- Error Correction Logic ---
        if failed_disk_index != -1:
            print("Attempting data recovery...")
            if failed_disk_index == 0: # Disk 0 failed
                print("Reconstructing Disk 0 data from Disk 1 and Parity.")
                reconstructed_block = self._calculate_parity([data_blocks[1], parity_block])
                data_blocks[0] = reconstructed_block
            elif failed_disk_index == 1: # Disk 1 failed
                print("Reconstructing Disk 1 data from Disk 0 and Parity.")
                reconstructed_block = self._calculate_parity([data_blocks[0], parity_block])
                data_blocks[1] = reconstructed_block
            elif failed_disk_index == 2: # Parity disk failed
                print("Parity disk has failed. Data is intact but redundancy is lost.")
                # Data can still be served, but a new parity block should be rebuilt.
            else:
                raise IOError("More than one disk has failed. Data is unrecoverable.")
            print("Recovery successful.")

        # Combine the data blocks and trim to the requested size
        full_data = b''.join(data_blocks)
        return full_data[:size]

# --- Part 3: Example Usage ---
if __name__ == "__main__":
    # 1. "Install" the hardware: three identical disk drives
    disk0 = DiskDrive(id=0)
    disk1 = DiskDrive(id=1)
    disk2 = DiskDrive(id=2)

    # 2. Create the high-level storage controller
    raid_controller = RAIDStorage(disk0, disk1, disk2)

    # 3. Write some data to the array
    my_data = b"This is a secret message that is very important and must not be lost."
    raid_controller.write(logical_address=42, data=my_data)

    # 4. Read the data back to verify it was written correctly
    retrieved_data = raid_controller.read(logical_address=42, size=len(my_data))
    print(f"\nInitial read successful. Data: '{retrieved_data.decode()}'")

    print("\n" + "="*50 + "\n")

    # 5. --- SIMULATE A DISK FAILURE ---
    print("!!! SIMULATING DISK 1 FAILURE !!!")
    disk1.unplug()

    # 6. Try to read the data again. The controller should recover it.
    recovered_data = raid_controller.read(logical_address=42, size=len(my_data))

    print("\n--- Final Result ---")
    print(f"Original data:  '{my_data.decode()}'")
    print(f"Recovered data: '{recovered_data.decode()}'")

    if my_data == recovered_data:
        print("\nSUCCESS: Data was successfully recovered from the failed disk array!")
    else:
        print("\nFAILURE: Data could not be recovered correctly.")


Disk 0 initialized.
Disk 1 initialized.
Disk 2 initialized.

Writing 69 bytes to logical address 42...
Write successful to physical block 42 on all disks.

Reading 69 bytes from logical address 42...

Initial read successful. Data: 'This is a secret message that is very important and must not be lost.'


!!! SIMULATING DISK 1 FAILURE !!!
CRITICAL: Disk 1 has been unplugged and is now offline!

Reading 69 bytes from logical address 42...
Attempting data recovery...
Reconstructing Disk 1 data from Disk 0 and Parity.
Recovery successful.

--- Final Result ---
Original data:  'This is a secret message that is very important and must not be lost.'
Recovered data: 'This is a secret message that is very important and must not be lost.'

SUCCESS: Data was successfully recovered from the failed disk array!
