### Week 2: Some intermediate topics in Cryptography

In [29]:
# Libraries we shall use for this Notebook
import hashlib
from Crypto.Util.number import getPrime, inverse, bytes_to_long, long_to_bytes
from Crypto.PublicKey import RSA
from Crypto.Random.random import getrandbits
import random
import math
from sympy import mod_inverse, integer_nthroot, randprime

# 1. RSA: Rivest-Shamir-Adleman Encryption

One of the most famous Encryption schemes, it is based on modular exponentiation(similar to Diffie-Hellman Key Exchange, which is covered later). Interestingly, Rivest came up with the one-way function while being drunk.

**Trapdoor functions:** A trapdoor function is a function that is easy to compute in one direction, yet difficult to compute in the opposite direction (finding its inverse) without special information, called the "trapdoor". 

RSA uses trapdoor functions, generating a pair of public keys for encryption, and a pair of private keys for decryption.
<p align="center">
  <img src="./images/trapdoor.png" alt="Trapdoor function graphics">
</p>


In [30]:
def generate_rsa_keys(bits=1024):
    # generating 2 large primes of 512 bits, that is, around the order of 2^(255)
    p = getPrime(bits // 2)
    q = getPrime(bits // 2)
    n = p * q
    # n is round the order of 2^511
    phi = (p - 1) * (q - 1) # taking Euler Totient function of n
    e = 65537
    d = inverse(e, phi) # also of the order of n
    return (e, n), (d, n)

public_key, private_key = generate_rsa_keys()

In [31]:
def rsa_encrypt(message, public_key):
    message = message.encode('utf-8')
    message = bytes_to_long(message)
    e, n = public_key
    return pow(message, e, n)

def rsa_decrypt(ciphertext, private_key):
    d, n = private_key
    decrypted = pow(ciphertext, d, n)
    decrypted_bytes = long_to_bytes(decrypted)
    decrypted_text = decrypted_bytes.decode('utf-8')
    return decrypted_text

<p align="center">
  <img src="./images/rsa.png" alt="RSA mathematics">
</p>


In [32]:
message = 'Welcome to Cryptogue week 2'
ciphertext = rsa_encrypt(message, public_key)
print("Ciphertext: ", ciphertext)

Ciphertext:  49600318242056596817827640254314963284713568588744128794615889006887368240146956758249264706562644220030993750651261920968024871764112982418939117678464948261495247094347151806029037370578381682992193568391833673705290521247242760536216003466243775445940034029020243778412107955518216273261538753662312620950


In [33]:
decrypted_message = rsa_decrypt(ciphertext, private_key)
print(decrypted_message)

Welcome to Cryptogue week 2


# 2. Diffie-Hellman Key Exchange protocol
The Diffie-Hellman key exchange protocol is a method for securely exchanging a secret key between two parties over an insecure channel. It's a cryptographic protocol that uses modular arithmetic, similar to RSA, and large random integers to create a shared secret key

### The Protocol:
1. Two parties, Alice and Bob, agree on a public number and a generator. 
2. Each party chooses a secret number, which is their private key. 
3. Each party calculates their public key using their private key and the public number. 
4. The parties exchange their public keys. 
5. Each party uses their private key and the other party's public key to calculate the shared secret key

<p align="center">
  <img src="./images/Diffie-Hellman-key-exchange-protocol.png" alt="RSA signature graphics">
</p>

In [34]:
#Initial Shared information
prime = getPrime(512)
g = 2
print("Initial Shared info:\nprime = ",prime,"\ng = ",g)

Initial Shared info:
prime =  13397151156844759225697512829654653794863564741439938017843327968891759893552256126733039332148584043957392975665763898325634202758837966024461696205198241 
g =  2


In [35]:
# Their secret keys
alice_private = random.randint(1, prime - 1)
bob_private = random.randint(1, prime - 1)
print("Alice private key: ",alice_private)
print("Bob private key:   ",bob_private)

Alice private key:  1878706013835763815263629675234304289237950246994940642600512310807526473877013861488842636480090088493324615095226275954631241092641082124661464043883227
Bob private key:    1007535039554261204701256427007328601762395211472302340956107504854507992207733283885979891012296361412795662595682629309903174211854433410076380242977936


In [36]:
# Public key
alice_public = pow(g, alice_private, prime)
bob_public = pow(g, bob_private, prime)
print("Alice public key: ",alice_public)
print("Bob public key:   ",bob_public)

Alice public key:  12503873158186602385279615616260829174155232895799421080751312748921964832403268751419561677238607946284881386324270157682056787943422966923884748672273974
Bob public key:    1978992444012451038610424433643475721717325648585078688696520180773426987743427490252780168715130771548675120165397060125500005210809167942678120123878956


In [37]:
# shared info, after applying their keys on the public info
shared_secret_alice = pow(bob_public, alice_private, prime)
shared_secret_bob = pow(alice_public, bob_private, prime)

print("Alice shared key: ",shared_secret_alice)
print("Bob public key:   ",shared_secret_bob)

if(shared_secret_alice == shared_secret_bob):
    print("Shared secret successfully established")
else:
    print("Shared secrets do not match")

Alice shared key:  13316241912182708859254942222376939053943191073013944132741406642787802992223706678193295996618928148069038650662389922565325477361983370197504720397294110
Bob public key:    13316241912182708859254942222376939053943191073013944132741406642787802992223706678193295996618928148069038650662389922565325477361983370197504720397294110
Shared secret successfully established


# 3. Hashing Algorithms: MD5 & SHA256

## 3.1 MD5 (Message Digest Algorithm 5):
In Cryptography, the MD5 message-digest algorithm is a cryptographic hash function designed to convert a message into a 128-bit hash value.

Due to its vulnerabilities including hash collision and preimage and second preimage attacks, which makes it possible to reverse-engineer digest value, it is replaced by more secure algorithms like SHA1, SHA256, etc. Even with its insecurity, MD5 is still the most commonly used message digest for non-cryptographic functions, such as used as a checksum to verify data integrity, compressing large files into smaller ones somewhat securely, etc.

<p align="center">
  <img src="./images/md5.png" alt="MD5 graphics">
</p>

**Here, Process P is a non-linear function**

In [38]:
message = ''
with open('./texts/lorem_ipsum.txt', 'r') as file:
    message = file.read()

# message = "Lorem ipsum dolor sit amet, ..."

def md5_hash_generator(text):
    text = text.encode('utf-8')
    md5_hash = hashlib.md5()
    md5_hash.update(text)
    hash_hex = md5_hash.hexdigest()
    return hash_hex

hash = md5_hash_generator(message)

print(f"Original message: {message}")
print(f"MD5 hash: {hash}")

Original message: Lorem ipsum dolor sit amet, consectetur adipiscing elit. Phasellus tempus posuere eros, sed ultricies tellus venenatis in. Vestibulum ac velit risus. Praesent gravida tellus id nisi pretium condimentum. Sed viverra, dolor id sagittis accumsan, diam elit consectetur justo, sit amet vehicula ipsum libero non orci. Pellentesque dolor neque, venenatis non tortor in, aliquam scelerisque est. Praesent in ex nisl. Nulla vitae ullamcorper tellus. Mauris iaculis condimentum elit, quis dictum turpis dictum in. Aenean a risus leo. Proin quis nisi nec purus porttitor vulputate. Etiam rutrum finibus magna, a vestibulum libero condimentum in. Nullam laoreet, sem ut sodales pulvinar, nisi tellus varius lorem, in aliquam odio sapien et augue. Etiam cursus mauris dui.
MD5 hash: e1eadb7b2e585a253f4d4dbacae712f2


## 3.2 SHA-256: Secure Hashing Algorithm 256-bits
It's a cryptographic hash function that produces a 256-bit (32-byte) hash value from any length of input data.The National Security Agency (NSA) and the National Institute of Standards and Technology (NIST) introduced SHA-256 in 2001.

**Advantages:**
- Being 256-bit long, it is resistant to brute force attacks by classical computers
- It is also collision resistant due to its complexity and entropy
  
Quantum computers pose threat to this hashing algorithm due to the Shor's algorithm for factorization, but is still really difficult to break

<p align="center">
  <img src="./images/sha-256.png" alt="SHA-256 graphics">
</p>

In [39]:
with open('./texts/lorem_ipsum.txt', 'r') as file:
    message = file.read()

# message = "Lorem ipsum dolor sit amet ..."

def sha256_hash_generator(text):
    text = text.encode('utf-8')
    hash_value = hashlib.sha256(text).hexdigest()
    hash_value = str(hash_value)
    return hash_value

hash = sha256_hash_generator(message)

print(f"Original message: {message}")
print(f"SHA256 Hash: {hash}")

Original message: Lorem ipsum dolor sit amet, consectetur adipiscing elit. Phasellus tempus posuere eros, sed ultricies tellus venenatis in. Vestibulum ac velit risus. Praesent gravida tellus id nisi pretium condimentum. Sed viverra, dolor id sagittis accumsan, diam elit consectetur justo, sit amet vehicula ipsum libero non orci. Pellentesque dolor neque, venenatis non tortor in, aliquam scelerisque est. Praesent in ex nisl. Nulla vitae ullamcorper tellus. Mauris iaculis condimentum elit, quis dictum turpis dictum in. Aenean a risus leo. Proin quis nisi nec purus porttitor vulputate. Etiam rutrum finibus magna, a vestibulum libero condimentum in. Nullam laoreet, sem ut sodales pulvinar, nisi tellus varius lorem, in aliquam odio sapien et augue. Etiam cursus mauris dui.
SHA256 Hash: cd177b4d7287d1a9d6baa2de54cf0b7d41518d29ba3f80ff4c91154ef82119d4


For modern password hashing, often a salt is added at the end of each password that is unique to the user. A salt is just a small string, appended to the password to increase the complexity and the search space

eg.: password = cryptogue_Week_2, salt = mnpcsec, final hash = sha256(cryptogue_Week_2mnpcsec)

It's value is stored for each user in a database. There is an in-built implementation of salting in the SHA256 algorithm of cryptography library of python. Do check it out if interested.

# 4 RSA digital signature
A mechanism to ensure the authenticity and integrity of a message. The sender signs the message using their private key, and the recipient verifies the signature using the sender's public key.
<p align="center">
  <img src="./images/rsa-signature.png" alt="RSA signature graphics">
</p>

In [40]:
private_key_for_alice, public_key_for_alice = generate_rsa_keys() 
# message = bytes_to_long(b'I, Alice, own this property')

with open("./texts/property_transfer_agreement.txt", "r") as file:
    agreement_text = file.read()

agreement_text_hash = sha256_hash_generator(agreement_text)
signed_agreement = rsa_encrypt(agreement_text_hash,private_key_for_alice)
print("Agreement hash: ", agreement_text_hash)
print("Signed agreement: ", signed_agreement)

Agreement hash:  1d0042cb200f99b5b227d1ef780ca08dcd23e60538c910367ce4115e1be55030
Signed agreement:  74345758863302907022875705660238333393199378906500105182743069208857021038499880002606603812648900051564738755760303695711855566867428925039363786229147956437545544242180769429654636883060123983742040466784638864618991138847617888585504507666163251478487049230278245912102315597341118086564223781427252963515


We first took the whole agreement into a string and then digested it using the sha256 algorithm to generate a hash for us, we then use RSA to encrypt this hash with the private keys of Alice.

This is now available and public, and can be decrypted with the public keys of Alice and verified by comparing the decrypted message with the hash of the agreement.

In [41]:
def validate_signature(agreement_text, signed_agreement, public_key):
    agreement_hash = sha256_hash_generator(agreement_text)
    decrypted_agreement_hash = rsa_decrypt(signed_agreement, public_key)
    print("Decrypted signature hash: ", decrypted_agreement_hash)
    print("Agreement hash:           ", agreement_hash)
    if decrypted_agreement_hash == agreement_hash:
        print(f"The agreement is signed by Alice")
    else:
        print(f"The agreement is not signed by Alice")

validate_signature(agreement_text, signed_agreement, public_key_for_alice)

Decrypted signature hash:  1d0042cb200f99b5b227d1ef780ca08dcd23e60538c910367ce4115e1be55030
Agreement hash:            1d0042cb200f99b5b227d1ef780ca08dcd23e60538c910367ce4115e1be55030
The agreement is signed by Alice


# 5 Hastad's Broadcast attack: Vulnerability in RSA
Hastad's Broadcast Attack is a cryptographic attack that exploits weaknesses in RSA encryption when the same message is encrypted with small public exponents and sent to multiple recipients.

**Attack Conditions:**
- Small Public Exponents: The attack is most effective when the public exponents $(e_1, e_2, ..., e_k)$ are small, typically 3.
- Multiple Recipients: The attacker needs to intercept the encrypted messages $(c_1, c_2, ..., c_k)$ sent to multiple recipients.

**Example Scenario:**
- Imagine a sender wants to encrypt a single message, m, and send it to multiple recipients.
- Each recipient has their own RSA public key: $(N_1, e_1), (N_2, e_2), ..., (N_k, e_k)$, where N is the modulus  and e is the public exponent.
- The sender encrypts the message m using each recipient's public key:
- 1. $ c_1 = m^{e_1} \; mod \; N_1 $
  2. $ c_2 = m^{e_2} \; mod \; N_2 $
  3. ...
  4. $ c_k = m^{e_k} \; mod \; N_k $

<p align="center">
  <img src="./images/hastad.png" alt="hastad graphics">
</p>

In [42]:
from sympy import mod_inverse, integer_nthroot

def hastads_broadcast_attack(ciphertexts, moduli):
    # Check input consistency
    assert len(ciphertexts) == len(moduli), "Number of ciphertexts and moduli must match"

    # Compute the product of all moduli (N = N_1 * N_2 * ... * N_n)
    N = 1
    for modulus in moduli:
        N *= modulus

    # Compute each partial product N_i
    partial_products = [N // modulus for modulus in moduli]

    # Compute the sum of (C_i * N_i * (N_i^-1 mod N_i))
    result = 0
    for i in range(len(moduli)):
        N_i = partial_products[i]
        C_i = ciphertexts[i]
        modulus = moduli[i]
        inverse = mod_inverse(N_i, modulus)
        result += C_i * N_i * inverse

    result = result % N

    plaintext, exact = integer_nthroot(result, 3)
    if not exact:
        raise ValueError("Failed to extract an exact cube root")

    return int(plaintext)

In [43]:
e = 3  # Public exponent
message = "small exponent RSA"
message_to_num = bytes_to_long(message.encode('utf-8'))

moduli = [randprime(10**50, 10**51) for _ in range(3)]

ciphertexts = [pow(message_to_num, e, modulus) for modulus in moduli]

recovered_number = hastads_broadcast_attack(ciphertexts, moduli)
recovered_message = long_to_bytes(recovered_number).decode('utf-8')

print(f"Original message: {message}")
print(f"Recovered message: {recovered_message}")

Original message: small exponent RSA
Recovered message: small exponent RSA


# 6. Parity Oracle Attack
The RSA parity oracle attack is a cryptographic attack that exploits a weakness in RSA by using an oracle to reveal the least-significant bit of a ciphertext's decryption

An oracle is an abstracted information source that can be used to give attackers a go/no go indication.
In this attack, the oracle reveals the least-significant bit of a ciphertext's decryption.
This information can be used to help attackers determine how close they are to their goals.



<p align="center">
  <img src="./images/oracle.png" alt="oracle graphics">
</p>
<p align="center">
  <img src="./images/oracle2.png" alt="oracle graphics">
</p>

### Algorithm of Oracle parity attack

1. RSA ciphertexts are just numbers. You can do trivial math on them. You can for instance multiply a ciphertext by the RSA-encryption of another number; the corresponding plaintext will be the product of those two numbers.
2. If you double a ciphertext (multiply it by (2**e)%n), the resulting plaintext will (obviously) be either even or odd.
3. If the plaintext after doubling is even, doubling the plaintext didn't wrap the modulus --- the modulus is a prime number. That means the plaintext is less than half the modulus.
4. ou can repeatedly apply this heuristic, once per bit of the message, checking your oracle function each time.
5. Your decryption function starts with bounds for the plaintext of [0,n].
6. Each iteration of the decryption cuts the bounds in half; either the upper bound is reduced by half, or the lower bound is.

After $log_2(n)$ iterations, you have the decryption of the message.

Print the upper bound of the message as a string at each iteration; you'll see the message

In [44]:
def parity_oracle_attack(ciphertext, public_key, modulus, oracle):
    lower_bound = 0
    upper_bound = modulus

    for iteration in range(modulus.bit_length() + 1):  # Add an extra iteration for safety
        # Multiply the ciphertext by 2^e mod N
        ciphertext = (ciphertext * pow(2, public_key, modulus)) % modulus

        # Query the oracle
        is_even = oracle(ciphertext)
        print(f"Iteration {iteration}: Lower={lower_bound}, Upper={upper_bound}, Oracle Response={is_even}")

        if is_even:
            # If even, plaintext is in the lower half
            upper_bound = (lower_bound + upper_bound) // 2
        else:
            # If odd, plaintext is in the upper half
            lower_bound = (lower_bound + upper_bound) // 2

        # Stop if the bounds converge
        if upper_bound - lower_bound <= 1:
            # print("Bounds converged.")
            break

    if upper_bound - lower_bound > 1:
        print("Warning: Bounds did not converge completely.")
    return lower_bound


In [45]:
# Generate RSA parameters
p = 225292488731544063850604968585030050664197308954243 
q = 551191138960513821906192011035895724642016720587707
# p and q are 2 random primes
modulus = p * q
public_key = 3
private_key = mod_inverse(public_key, (p - 1) * (q - 1))

# Define the plaintext and encrypt it
message = "OPTIMISM LEADS TO OPTIMALITY "
message_to_num = bytes_to_long(message.encode('utf-8'))
ciphertext = pow(message_to_num, public_key, modulus)

# Define the parity oracle
def oracle(ciphertext):
    decrypted = pow(ciphertext, private_key, modulus)
    return decrypted % 2 == 0

# Attempt to recover the plaintext using the parity oracle attack
recovered_num = parity_oracle_attack(ciphertext, public_key, modulus, oracle)
recovered_message = long_to_bytes(recovered_num).decode('utf-8')

print(f"Original message: {message}")
print(f"Recovered message: {recovered_message}")

Iteration 0: Lower=0, Upper=124179223463188498449252700056769306422303028589267286596054751504996342937501269070265128796531290801, Oracle Response=True
Iteration 1: Lower=0, Upper=62089611731594249224626350028384653211151514294633643298027375752498171468750634535132564398265645400, Oracle Response=True
Iteration 2: Lower=0, Upper=31044805865797124612313175014192326605575757147316821649013687876249085734375317267566282199132822700, Oracle Response=True
Iteration 3: Lower=0, Upper=15522402932898562306156587507096163302787878573658410824506843938124542867187658633783141099566411350, Oracle Response=True
Iteration 4: Lower=0, Upper=7761201466449281153078293753548081651393939286829205412253421969062271433593829316891570549783205675, Oracle Response=True
Iteration 5: Lower=0, Upper=3880600733224640576539146876774040825696969643414602706126710984531135716796914658445785274891602837, Oracle Response=True
Iteration 6: Lower=0, Upper=1940300366612320288269573438387020412848484821707301353063355