In [4]:
!pip install bcrypt
!pip install cryptography
!pip install argon2-cffi



In [9]:
### HashGenerator Project: Progressive Hashing Implementations
# This project will implement advanced hashing techniques, including:
# 1. Cryptographic Hashing
# 2. Incremental Hashing (for streaming data)
# 3. Progressive Hashing with modular design
# 4. Salting and Keyed Hash Functions for security
# 5. Advanced Techniques: PBKDF2, bcrypt, scrypt, Argon2, SHA3-512, and Blake2b
# The code is modular, with in-depth explanations for each part.

import hashlib
import hmac
import bcrypt
from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC
from cryptography.hazmat.primitives.kdf.scrypt import Scrypt
import argon2 as Argon2
from cryptography.hazmat.primitives.hashes import SHA256, SHA3_512, BLAKE2b
from base64 import urlsafe_b64encode
from typing import Optional, Tuple

class HashGenerator:
    """
    A modular implementation for hashing using progressive techniques.
    """

    def __init__(self, algorithm: str = 'sha256', salt: Optional[bytes] = None):
        """
        Initialize the HashGenerator with a specified hashing algorithm and optional salt.

        Args:
            algorithm (str): The hashing algorithm to use (e.g., 'sha256', 'sha512').
            salt (Optional[bytes]): A salt value for enhancing security.
        """
        if algorithm not in hashlib.algorithms_available:
            raise ValueError(f"Unsupported hashing algorithm: {algorithm}")

        self.algorithm = algorithm
        self.salt = salt if salt else b''
        self.hasher = hashlib.new(algorithm)
        print(f"Initialized HashGenerator with {algorithm} and salt: {self.salt}")

    def hash_data(self, data: bytes) -> str:
        """
        Compute the hash of the input data with optional salting.

        Args:
            data (bytes): The input data to hash.

        Returns:
            str: The hexadecimal representation of the hash.
        """
        self.hasher.update(self.salt + data)
        hashed_value = self.hasher.hexdigest()
        print(f"Data hashed: {data} -> {hashed_value}")
        return hashed_value

    def incremental_hash(self, data_chunks: list[bytes]) -> str:
        """
        Compute a hash incrementally by processing chunks of data.

        Args:
            data_chunks (list[bytes]): A list of data chunks.

        Returns:
            str: The hexadecimal representation of the hash.
        """
        for chunk in data_chunks:
            print(f"Processing chunk: {chunk}")
            self.hasher.update(self.salt + chunk)
        incremental_hash_value = self.hasher.hexdigest()
        print(f"Incremental hash: {incremental_hash_value}")
        return incremental_hash_value

    @staticmethod
    def keyed_hash(data: bytes, key: bytes, algorithm: str = 'sha256') -> str:
        """
        Compute an HMAC (Keyed-Hash Message Authentication Code) using a given key.

        Args:
            data (bytes): The input data to hash.
            key (bytes): The secret key for HMAC.
            algorithm (str): The hashing algorithm to use.

        Returns:
            str: The hexadecimal representation of the HMAC.
        """
        hmac_instance = hmac.new(key, data, digestmod=algorithm)
        hmac_value = hmac_instance.hexdigest()
        print(f"Keyed hash (HMAC): {data} with key {key} -> {hmac_value}")
        return hmac_value

    @staticmethod
    def pbkdf2_hash(password: bytes, salt: bytes, iterations: int = 100000) -> str:
        """
        Derive a key using PBKDF2 (Password-Based Key Derivation Function 2).

        Args:
            password (bytes): The password to hash.
            salt (bytes): A unique salt value.
            iterations (int): Number of iterations.

        Returns:
            str: The derived key as a base64-encoded string.
        """
        kdf = PBKDF2HMAC(algorithm=SHA256(), length=32, salt=salt, iterations=iterations)
        key = urlsafe_b64encode(kdf.derive(password)).decode('utf-8')
        print(f"PBKDF2 hash derived: {key}")
        return key

    @staticmethod
    def bcrypt_hash(password: str) -> str:
        """
        Hash a password using bcrypt.

        Args:
            password (str): The password to hash.

        Returns:
            str: The hashed password.
        """
        hashed = bcrypt.hashpw(password.encode(), bcrypt.gensalt())
        print(f"bcrypt hash: {hashed}")
        return hashed.decode()

    @staticmethod
    def scrypt_hash(password: bytes, salt: bytes, n: int = 2**14, r: int = 8, p: int = 1) -> str:
        """
        Derive a key using scrypt.

        Args:
            password (bytes): The password to hash.
            salt (bytes): A unique salt value.
            n (int): CPU/memory cost parameter.
            r (int): Block size parameter.
            p (int): Parallelization parameter.

        Returns:
            str: The derived key as a base64-encoded string.
        """
        kdf = Scrypt(salt=salt, length=32, n=n, r=r, p=p)
        key = urlsafe_b64encode(kdf.derive(password)).decode('utf-8')
        print(f"scrypt hash derived: {key}")
        return key

    @staticmethod
    def argon2_hash(password: bytes, salt: bytes, time_cost: int = 2, memory_cost: int = 102400, parallelism: int = 8) -> str:
        """
        Derive a key using Argon2 (a modern memory-hard hashing algorithm).

        Args:
            password (bytes): The password to hash.
            salt (bytes): A unique salt value.
            time_cost (int): Number of iterations.
            memory_cost (int): Memory cost in KB.
            parallelism (int): Number of parallel threads.

        Returns:
            str: The derived key as a base64-encoded string.
        """
        kdf = Argon2(time_cost=time_cost, memory_cost=memory_cost, parallelism=parallelism, length=32)
        key = urlsafe_b64encode(kdf.derive(password, salt)).decode('utf-8')
        print(f"Argon2 hash derived: {key}")
        return key

    @staticmethod
    def sha3_512_hash(data: bytes) -> str:
        """
        Compute a SHA3-512 hash of the input data.

        Args:
            data (bytes): The input data to hash.

        Returns:
            str: The hexadecimal representation of the hash.
        """
        hash_obj = hashlib.sha3_512()
        hash_obj.update(data)
        hashed_value = hash_obj.hexdigest()
        print(f"SHA3-512 hash: {data} -> {hashed_value}")
        return hashed_value

    @staticmethod
    def blake2b_hash(data: bytes, digest_size: int = 64) -> str:
        """
        Compute a Blake2b hash of the input data.

        Args:
            data (bytes): The input data to hash.
            digest_size (int): Size of the output digest (default: 64 bytes).

        Returns:
            str: The hexadecimal representation of the hash.
        """
        hash_obj = hashlib.blake2b(digest_size=digest_size)
        hash_obj.update(data)
        hashed_value = hash_obj.hexdigest()
        print(f"Blake2b hash: {data} -> {hashed_value}")
        return hashed_value

    @staticmethod
    def verify_hash(data: bytes, expected_hash: str, algorithm: str = 'sha256') -> bool:
        """
        Verify if the hash of the data matches the expected hash.

        Args:
            data (bytes): The input data to verify.
            expected_hash (str): The expected hash value.
            algorithm (str): The hashing algorithm to use.

        Returns:
            bool: True if the hash matches, False otherwise.
        """
        hash_obj = hashlib.new(algorithm)
        hash_obj.update(data)
        actual_hash = hash_obj.hexdigest()
        print(f"Verifying hash: {data} -> {actual_hash} (expected: {expected_hash})")
        return actual_hash == expected_hash

    @staticmethod
    def dehash_placeholder():
        """
        Placeholder for "dehashing" functionality. Note that cryptographic hashes are not reversible,
        so this function serves as an explanation that dehashing is not feasible for secure hashes.

        Returns:
            str: Explanation about the impossibility of dehashing.
        """
        explanation = (
            "Cryptographic hashes are designed to be one-way functions. Dehashing is not feasible "
            "because it would break the fundamental security properties of the hash function. "
            "Use secure password recovery techniques (e.g., password reset mechanisms) instead."
        )
        print(explanation)
        return explanation


if __name__ == "__main__":
    # Initialize a HashGenerator instance with a specific algorithm and salt
    salt = b'secure_salt'
    generator = HashGenerator(algorithm='sha256', salt=salt)

    # Example data to hash
    data = b"Hello, this is a sample input for hashing!"

    # 1. Cryptographic Hashing
    print("=== Cryptographic Hashing ===")
    hash_result = generator.hash_data(data)
    print(f"SHA256 Hash: {hash_result}")

    # 2. Incremental Hashing
    print("\n=== Incremental Hashing ===")
    data_chunks = [b"Chunk 1: ", b"Chunk 2: ", b"Chunk 3!"]
    incremental_hash_result = generator.incremental_hash(data_chunks)
    print(f"Incremental Hash: {incremental_hash_result}")

    # 3. Keyed Hash (HMAC)
    print("\n=== Keyed Hashing (HMAC) ===")
    key = b'secret_key'
    hmac_result = generator.keyed_hash(data, key)
    print(f"HMAC (SHA256): {hmac_result}")

    # 4. Password Hashing with PBKDF2
    print("\n=== Password Hashing (PBKDF2) ===")
    password = b'my_secure_password'
    pbkdf2_result = generator.pbkdf2_hash(password, salt, iterations=100000)
    print(f"PBKDF2 Derived Key: {pbkdf2_result}")

    # 5. Password Hashing with bcrypt
    print("\n=== Password Hashing (bcrypt) ===")
    bcrypt_result = generator.bcrypt_hash("my_secure_password")
    print(f"bcrypt Hashed Password: {bcrypt_result}")

    # 6. Password Hashing with scrypt
    print("\n=== Password Hashing (scrypt) ===")
    scrypt_result = generator.scrypt_hash(password, salt, n=2**14, r=8, p=1)
    print(f"scrypt Derived Key: {scrypt_result}")

    # 7. Password Hashing with Argon2
    print("\n=== Password Hashing (Argon2) ===")
    try:
        argon2_result = generator.argon2_hash(password, salt)
        print(f"Argon2 Derived Key: {argon2_result}")
    except Exception as e:
        print(f"Argon2 hashing failed: {e}")

    # 8. Advanced Cryptographic Hashing (SHA3-512 and Blake2b)
    print("\n=== Advanced Cryptographic Hashing ===")
    sha3_512_result = generator.sha3_512_hash(data)
    print(f"SHA3-512 Hash: {sha3_512_result}")
    blake2b_result = generator.blake2b_hash(data, digest_size=64)
    print(f"Blake2b Hash: {blake2b_result}")

    # 9. Hash Verification
    print("\n=== Hash Verification ===")
    is_valid = generator.verify_hash(data, hash_result, algorithm='sha256')
    print(f"Is the original hash valid? {'Yes' if is_valid else 'No'}")

    # 10. Dehashing Explanation
    print("\n=== Dehashing Explanation ===")
    dehash_info = generator.dehash_placeholder()
    print(dehash_info)

Initialized HashGenerator with sha256 and salt: b'secure_salt'
=== Cryptographic Hashing ===
Data hashed: b'Hello, this is a sample input for hashing!' -> 53d363106012cd1a74ba9bb58b656a8cf03b1166f6997ee89fcdf713933e6407
SHA256 Hash: 53d363106012cd1a74ba9bb58b656a8cf03b1166f6997ee89fcdf713933e6407

=== Incremental Hashing ===
Processing chunk: b'Chunk 1: '
Processing chunk: b'Chunk 2: '
Processing chunk: b'Chunk 3!'
Incremental hash: 2d84c0983daa1bb3786987f7164ee0257b40ab785b0097b49d0ff42f4502470b
Incremental Hash: 2d84c0983daa1bb3786987f7164ee0257b40ab785b0097b49d0ff42f4502470b

=== Keyed Hashing (HMAC) ===
Keyed hash (HMAC): b'Hello, this is a sample input for hashing!' with key b'secret_key' -> 29916c928881935606d1d893a9239c8a0802fb4dae31b6485deb2e913395c8a4
HMAC (SHA256): 29916c928881935606d1d893a9239c8a0802fb4dae31b6485deb2e913395c8a4

=== Password Hashing (PBKDF2) ===
PBKDF2 hash derived: dGKjfq4Nxxq7Xt5niEkFGF35TlvA0URnBbPcpz_uW64=
PBKDF2 Derived Key: dGKjfq4Nxxq7Xt5niEkFGF35TlvA

##**Explanation of All Used Techniques**

If you wish to learn more about the techniques as well used in the code , this below explanation is for you...

### **1. Hashing Overview**

Hashing is the process of converting data into a fixed-size string of characters, typically a hash value, through a hash function. Hashes are commonly used for data indexing, integrity verification, and cryptographic purposes.

#### **Why?**
To provide a unique identifier for data that:
1. Is of a fixed size.
2. Can be used for quick lookups or integrity checks.

#### **Mathematical Formulation**
A hash function $ H $ maps input $ x $ to an output $ h $:
$
h = H(x)
$
Where:
- $ x $: Input data.
- $ h $: Fixed-size hash value.

#### **Procedure**
1. Select a hash function (e.g., MD5, SHA-256).
2. Pass the data through the function.
3. Store or compare the resulting hash.

#### **Example Usage**
- **Databases:** Storing hashed passwords.
- **Files:** Verifying downloads.

#### **Benefits**
- Fast and deterministic.
- Produces unique outputs for unique inputs.

#### **Drawbacks**
- Hash collisions (rare for cryptographic hashes).

---

### **2. Cryptographic Hashing**

Cryptographic hashing enhances basic hashing by ensuring specific properties like pre-image resistance, second pre-image resistance, and collision resistance.

#### **Why?**
To ensure secure data hashing, especially for sensitive information like passwords and digital signatures.

#### **Key Properties**
1. **Pre-image Resistance:** It should be computationally infeasible to reverse the hash.
2. **Collision Resistance:** Two different inputs shouldn’t produce the same hash.
3. **Avalanche Effect:** Small changes in input result in significant changes in the hash.

#### **Common Algorithms**
- MD5 (deprecated)
- SHA-256 (widely used)
- SHA-3

#### **Procedure**
1. Choose a cryptographic hash algorithm.
2. Hash the input data securely.
3. Use the hash for validation, not reconstruction.

#### **Example Usage**
- **Digital Signatures:** Verify message authenticity.
- **Blockchain:** Secure transaction records.

#### **Benefits**
- Strong security guarantees.
- Widely adopted and standardized.

#### **Drawbacks**
- Computationally intensive.
- Vulnerable if the hash algorithm becomes obsolete.

---

### **3. Incremental Hashing**

Incremental hashing computes the hash of data in chunks, allowing large data to be hashed incrementally instead of loading the entire dataset into memory.

#### **Why?**
To handle hashing for large files or streams efficiently without requiring excessive memory.

#### **Mathematical Formulation**
The hash of a concatenation of chunks $ x_1, x_2, \ldots, x_n $ is:
$
H(x_1 \oplus x_2 \oplus \ldots \oplus x_n)
$

#### **Procedure**
1. Initialize a hash object.
2. Update the hash object with data chunks.
3. Finalize the hash computation.

#### **Example Usage**
- **File Verification:** Hashing large files incrementally.
- **Streaming Data:** Hashing real-time data streams.

#### **Benefits**
- Handles large data efficiently.
- Reduces memory usage.

#### **Drawbacks**
- Slightly more complex than simple hashing.

---

### **4. Keyed Hashing (HMAC)**

#### **Concept**
HMAC (Hash-based Message Authentication Code) combines a cryptographic hash function with a secret key to ensure data authenticity and integrity.

#### **Why?**
To protect against tampering and verify that the data originates from a trusted source.

#### **Mathematical Formulation**
$
HMAC(K, M) = H((K \oplus opad) \| H((K \oplus ipad) \| M))
$
Where:
- $ K $: Secret key.
- $ M $: Message.
- $ H $: Hash function.
- $ opad, ipad $ : Outer and inner padding.

#### **Procedure**
1. Pad the key.
2. Hash the inner message and key.
3. Hash the outer message with the result of step 2.
4. Output the final HMAC.


#### **Example Usage**
- **API Authentication:** Secure API communication.
- **File Integrity:** Verify file authenticity.

#### **Benefits**
- Strong integrity and authenticity guarantees.
- Resistant to length-extension attacks.

#### **Drawbacks**
- Requires secure key management.
- Slightly slower due to additional hashing.

---

### **5. Salting**

Salting adds random data (a "salt") to input before hashing to ensure uniqueness of the hash, even for identical inputs.

#### **Why?**
To mitigate the risks of hash collisions and precomputed dictionary attacks.

#### **Mathematical Formulation**
$
H'(M, S) = H(M + S)
$
Where:
- $ M $: Original message.
- $ S $: Salt.
- $ H $: Hash function.

#### **Procedure**
1. Generate a unique salt.
2. Append the salt to the input.
3. Hash the salted input.

#### **Example Usage**
- **Password Storage:** Protect against rainbow table attacks.

#### **Benefits**
- Unique hashes for identical inputs.
- Enhances security of stored data.

#### **Drawbacks**
- Requires storing the salt with the hash.
- Increases computational overhead slightly.

---

### **6. PBKDF2 (Password-Based Key Derivation Function 2)**

PBKDF2 is a key derivation function designed to generate secure cryptographic keys from passwords. It applies a hash function multiple times (iterations) to make it more resistant to brute-force attacks.

#### **Why?**
To securely store passwords by making the process of deriving a key computationally expensive, thereby reducing the risk of brute-force attacks.

#### **Mathematical Formulation**
PBKDF2 is defined as:
$
\text{PBKDF2}(P, S, c, dkLen) = \text{F}(P, S, c, dkLen)
$
Where:
- $ P $: Password (input).
- $ S $: Salt.
- $ c $: Iteration count (number of iterations).
- $ dkLen $: Desired length of the derived key.
- $ \text{F} $: The pseudorandom function (often HMAC with a hash function like SHA-256).

#### **Procedure**
1. Select a password and salt.
2. Choose a large iteration count.
3. Derive the key by applying HMAC with multiple iterations.


#### **Example Usage**
- **Password Hashing:** Secure password storage.
- **Key Derivation:** For cryptographic systems requiring strong key generation.

#### **Benefits**
- Resilient against brute-force and dictionary attacks due to multiple iterations.
- Widely used and standardized.

#### **Drawbacks**
- Computationally expensive (slower key derivation).
- Salt and iteration count need to be stored securely.

---

### **7. bcrypt**

bcrypt is a cryptographic algorithm for hashing passwords. It is based on the Blowfish cipher and incorporates a work factor (the number of iterations) that increases computational cost to prevent brute-force attacks.

#### **Why?**
To make password hashing resistant to brute-force attacks and rainbow table attacks by incorporating both salt and a variable number of iterations (work factor).

#### **Mathematical Formulation**
bcrypt is computed as:
$
\text{bcrypt}(P, S, W) = \text{BLOWFISH}(P, S, W)
$
Where:
- $ P $: Password (input).
- $ S $: Salt.
- $ W $: Work factor (number of iterations).

#### **Procedure**
1. Generate a random salt.
2. Combine the password and salt.
3. Apply the Blowfish encryption algorithm multiple times as determined by the work factor.

#### **Example Usage**
- **Password Hashing:** Secure password storage.
- **Authentication:** Verifying user credentials.

#### **Benefits**
- Adaptive work factor for increased security.
- Highly resistant to rainbow table and brute-force attacks.

#### **Drawbacks**
- Slower compared to basic hash functions.
- Requires additional memory resources.

---

### **8. scrypt**

scrypt is a password-based key derivation function designed to be memory-intensive as well as computationally expensive. It is more resistant to parallel brute-force attacks (like those using GPUs or ASICs) than PBKDF2 or bcrypt.

#### **Why?**
To provide a secure password hashing function that is both computationally and memory-intensive, making it difficult for attackers to use specialized hardware to crack passwords.

#### **Mathematical Formulation**
scrypt is defined as:
$
\text{scrypt}(P, S, N, r, p, dkLen) = \text{F}(P, S, N, r, p, dkLen)
$
Where:
- $ P $: Password.
- $ S $: Salt.
- $ N $: CPU cost parameter (defines iterations).
- $ r $: Block size.
- $ p $: Parallelization factor.
- $ dkLen $: Desired length of derived key.
- $ \text{F} $: The pseudorandom function.

#### **Procedure**
1. Choose a password and salt.
2. Select CPU and memory cost parameters.
3. Derive the key by applying the function, which involves large memory usage.

#### **Example Usage**
- **Password Hashing:** Suitable for high-security applications where brute-force resistance is needed.
- **Cryptographic Systems:** Deriving keys for encryption.

#### **Benefits**
- High resistance to parallelized attacks.
- Memory-intensive design adds security against ASIC-based attacks.

#### **Drawbacks**
- Requires significant memory, which can lead to performance concerns on low-memory devices.

---

### **9. Argon2**

Argon2 is a cryptographic hash function designed for password hashing. It is the winner of the Password Hashing Competition (PHC) and provides strong resistance against both brute-force and side-channel attacks.

#### **Why?**
To provide an efficient, secure, and configurable password hashing function that is resistant to both hardware-based attacks (e.g., GPUs) and side-channel attacks.

#### **Mathematical Formulation**
Argon2 has two variants: Argon2d and Argon2i, each optimized for different use cases. The general form is:
$
\text{Argon2}(P, S, T, M, dkLen) = \text{F}(P, S, T, M, dkLen)
$
Where:
- $ P $: Password.
- $ S $: Salt.
- $ T $: Time cost (iterations).
- $ M $: Memory cost.
- $ dkLen $: Desired key length.
- $ \text{F} $: The pseudorandom function.

#### **Procedure**
1. Select parameters (time cost, memory cost).
2. Derive the key using the Argon2 algorithm, which includes both computational and memory-based challenges.

#### **Example Usage**
- **Password Storage:** High-security password hashing.
- **Key Derivation:** For cryptographic systems.

#### **Benefits**
- Highly configurable time and memory cost parameters.
- Provides both parallelism and memory-hardness.

#### **Drawbacks**
- Can be slow for low-memory environments.
- Newer, so adoption might not be as widespread yet.

---

### **10. SHA3-512**

SHA3-512 is part of the SHA-3 family of cryptographic hash functions. It produces a 512-bit hash value and is designed to provide strong security properties, such as resistance to collision and preimage attacks.

#### **Why?**
SHA3 was developed as a secure alternative to the SHA-2 family of hash functions, which could potentially be vulnerable to future advances in cryptanalysis.

#### **Mathematical Formulation**
SHA3-512 is defined as a part of the Keccak family of functions:

$
H = \text{Keccak-512}(M)
$

where:
- $ M $: Message input.
- $ H $: 512-bit hash output.

#### **Procedure**
1. Feed the message through the SHA3-512 function.
2. The function outputs a 512-bit hash.

#### **Example Usage**
- **Digital Signatures:** Verifying data authenticity.
- **Blockchain:** Verifying integrity of blocks.

#### **Benefits**
- Strong resistance to collision and preimage attacks.
- Part of the SHA-3 family, which is considered secure against modern attacks.

#### **Drawbacks**
- Slower than SHA-2.
- Not as widely adopted as SHA-2 yet.

---

### **11. Blake2b**

Blake2b is a cryptographic hash function that is optimized for speed and security. It is designed to be faster than MD5 and SHA-2, while still providing strong security properties.

#### **Why?**
To offer a faster, more secure alternative to existing hash functions like MD5 and SHA-2, with optimized performance for modern processors.

#### **Mathematical Formulation**
Blake2b is based on the sponge construction and outputs a variable-length hash:

$
H = \text{Blake2b}(M)
$

where:
- $ M $: Message input.
- $ H $: Hash output.

#### **Procedure**
1. Input the data into the Blake2b function.
2. The function outputs a fixed-length hash.

#### **Example Usage**
- **File Integrity:** Verifying large files.
- **Cryptographic Systems:** Hashing for digital signatures.

#### **Benefits**
- Very fast and secure.
- Higher performance than MD5 or SHA-2 in many use cases.

#### **Drawbacks**
- Not as widely supported in older systems.
- Not as proven in real-world applications as SHA-2.

---
