In [1]:
import os

from cryptography.hazmat.backends import default_backend
from cryptography.hazmat.primitives import hashes
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC

# PBKDF2HMAC
- #### The PBKDF2HMAC is a key derivation function (KDF) used to derive a cryptographic key from a password (or passphrase) using HMAC (Hash-based Message Authentication Code) and the PBKDF2 algorithm. 

- #### It is commonly used for securely generating keys from user passwords.<br>

- #### SHA256 (Secure Hash Algorithm 256)<br>

- #### 32, which means the derived key will be 32 bytes long.<br>

- #### salt is a random string added to the password before hashing to ensure that even ,

    - #### if two users have the same password they will get different keys.
    
    - #### This prevents attackers from using precomputed "rainbow tables" (which store hash values for common passwords).<br>
    
- #### iterations=100000 is is the number of iterations the PBKDF2 function will apply the hashing algorithm. 

    - #### The more iterations, the more computationally expensive it becomes to derive the key, which helps defend against brute-force attacks.<br>

- #### backend used for cryptographic operations.

    - #### default_backend() uses the default cryptographic backend provided by the cryptography package, which is typically OpenSSL or another secure library depending on your platform.

In [2]:
salt = os.urandom(16)

kdf = PBKDF2HMAC(
    algorithm=hashes.SHA256(),
    length=32,  # AES-256 requires a 32-byte key
    salt=salt,
    iterations=100000,
    backend=default_backend(),
)
kdf

<cryptography.hazmat.primitives.kdf.pbkdf2.PBKDF2HMAC at 0x7111f415fee0>

- #### kdf.derive(password.encode()) returns the key as bytes. 
    - #### The key.hex() method is used to print the key as a hexadecimal string, so it’s easier to read.


In [3]:
password = "mubeen"
kdf.derive(password.encode())

b"Jv\x17h\x06\xf3\x81\x9aV\x1a\xfcq\xbd\nSj8\xea'\xa4d\xa4l\xd7\x98\xd7\xc9B=\xa6v\xd7"

- #### os.urandom(16) generates 16 random bytes, which is used as the salt. 
    - #### This 16-byte salt is passed to the PBKDF2HMAC function to derive the cryptographic key.
    - #### Let’s say the password is "mubeen". If you use the same password but different salts each time, the derived keys will be different:
        - #### First salt: abc123 → Derived key: key1
        - #### Second salt: xyz456 → Derived key: key2

In [4]:
os.urandom(16)

b'/3+\x13\xfa\xe5\xa8\xf8:\xf2\xf82G\xf7&\xdf'

In [5]:
def generate_key(password: str, salt: bytes):

    kdf = PBKDF2HMAC(
        algorithm=hashes.SHA256(),
        length=32,  # AES-256 requires a 32-byte key
        salt=salt,
        iterations=100000,
        backend=default_backend(),
    )

    return kdf.derive(password.encode())

In [6]:
generate_key(password="mubeen", salt=os.urandom(16))

b'<C\xa0HUgq\x99\xccd\x9d\xcc\x07\x1c\x9c\xc5\xf9\xd7y\x86\x19&\xfe\x13\xa7\x10T\xe3\xd9\xc2aE'

In [7]:
generate_key(password="mubeen", salt=os.urandom(16))

b'\xcc\xd5}\x9eY\xf4U\xbc\xc8\x14\x9ft0\x1d0i\x9a\x90\x1cA(\xe9\x0c\xaa8D-i\x92\xc8?Z'

# Encryption

- ## Salt and Nonce Generation
    - ### Salt: This is a 16-byte random value that will be used with the password to ensure that the derived key is unique. 
        - ### Even if the password is the same, the result will be different each time due to the salt.<br>
        
    - ### Nonce: This is a 12-byte random value used in AES-GCM encryption. 
        - ### It ensures that even if the same data is encrypted multiple times, the ciphertext will be different due to the nonce.

- ### Deriving the AES Key from Password and Salt
    - #### `key = generate_key(password, salt)`<br>

- ### Deriving AES Key: This line uses a derive_key function (presumably defined elsewhere) to generate an AES encryption key 
    - ### from the provided password and salt. The key is used for AES encryption.

## Creating the AES-GCM Cipher
- ### Cipher: Here, an AES cipher is created using the derived key and AES-GCM mode. 
    - ### GCM (Galois/Counter Mode) is a mode of operation for AES that provides both confidentiality and data integrity (authentication). 
    - ### The nonce is also passed to ensure uniqueness during encryption.

In [8]:
salt = os.urandom(16)
nonce = os.urandom(12)
password = "mubeen"
key = generate_key(password, salt)

cipher = Cipher(algorithms.AES(key), modes.GCM(nonce), backend=default_backend())
cipher

<cryptography.hazmat.primitives.ciphers.base.Cipher at 0x7111eff09750>

- ## Encrypting the Data
    - ### encryptor.update(file_data) encrypts the file data in chunks.
    - ### encryptor.finalize() completes the encryption process and provides any remaining data that needs to be encrypted.

In [9]:
data = "hello"

encryptor = cipher.encryptor()
encrypted_data = encryptor.update(data.encode()) + encryptor.finalize()
encrypted_data

b'\x87`\x03_\xd3'

- ## Writing the Encrypted Data to File
    - ### Salt: Random data added to make sure even the same password gives different encryption results.
    - ### Nonce (IV): Random data that ensures each encryption operation is unique.
    - ### Encrypted Data: The file data that’s now in an unreadable form.
    - ### Authentication Tag: A security tag to check if the data is tampered with.

In [10]:
with open("test", "wb") as f:

    f.write(salt)  # Write the salt first
    f.write(nonce)  # Write the nonce (IV)
    f.write(encrypted_data)  # Write the encrypted data
    f.write(encryptor.tag)  # Write the authentication tag for integrity

In [14]:
!cat ./test

 6��O���0�v��P�&]֤����A1آyǐ̯�:��~�s��

- # Example

In [11]:
!cat ./code.txt

123
321
123

In [12]:
with open("code.txt", "rb") as f:
    file_data = f.read()
    print(file_data)

salt = os.urandom(16)
nonce = os.urandom(12)
password = "mubeen"
key = generate_key(password, salt)

cipher = Cipher(algorithms.AES(key), modes.GCM(nonce), backend=default_backend())
encryptor = cipher.encryptor()
encrypted_data = encryptor.update(file_data) + encryptor.finalize()

with open("./code.txt", "wb") as f:

    f.write(salt)  # Write the salt first

    f.write(nonce)  # Write the nonce (IV)

    f.write(encrypted_data)  # Write the encrypted data

    f.write(encryptor.tag)  # Write the authentication tag for integrity

b'123\n321\n123'


In [13]:
# again read text
!cat ./code.txt

�B��hjX{Ӻs�dy�k9�1�b�ks��( �6q�*�ɉ6�P"֢���y&���

In [14]:
encrypted_data

b'( \xea6q\xd5*\x8c\xc9\x896'

# Decryption

In [15]:
with open("./code.txt", "rb") as f:
    file_data = f.read()
    print(file_data)

b'\xf8B\xaf\xb1h\x14jX\x15{\xd3\xbas\xb4dy\x91k9\xd61\xc3b\xbaks\x8e\xed( \xea6q\xd5*\x8c\xc9\x896\xacP"\xd6\xa2\x90\x9d\xc1y&\x03\xbb\x11\x8e\xc9\x12'


In [16]:
file_size = os.path.getsize("./code.txt")
file_size

55

- ## lets decrypt

In [17]:
key

b'#\x84?\x88\x0e\x86\x1f\x93I\x12/)\x9f\x06\x96\x15M\xaa\x07)\xa8\r\x96\xd7\xec\x12\xdc<\x8dJz\x8e'

In [18]:
nonce

b'\x91k9\xd61\xc3b\xbaks\x8e\xed'

In [19]:
encryptor.tag

b'\xacP"\xd6\xa2\x90\x9d\xc1y&\x03\xbb\x11\x8e\xc9\x12'

In [20]:
salt = os.urandom(16)
nonce = os.urandom(12)
password = "mubeen"
key = generate_key(password, salt)

cipher = Cipher(algorithms.AES(key), modes.GCM(nonce, encryptor.tag), backend=default_backend())

- ## Now this time we use cipher.decryptor()

In [21]:
decryptor = cipher.decryptor()
decryptor

<cryptography.hazmat.primitives.ciphers.base._AEADDecryptionContext at 0x7111f415ff40>

In [22]:
with open("./code.txt", "rb") as f:

    file_size = os.path.getsize("./code.txt")
    salt = f.read(16)
    nonce = f.read(12)
    encrypted_data = f.read(file_size - 16 - 12 - 16)
    tag = f.read(16)

    print("file_size", file_size)
    print("salt", salt)
    print("nonce", nonce)
    print("encrypted_data", encrypted_data)
    print("tag", tag)

file_size 55
salt b'\xf8B\xaf\xb1h\x14jX\x15{\xd3\xbas\xb4dy'
nonce b'\x91k9\xd61\xc3b\xbaks\x8e\xed'
encrypted_data b'( \xea6q\xd5*\x8c\xc9\x896'
tag b'\xacP"\xd6\xa2\x90\x9d\xc1y&\x03\xbb\x11\x8e\xc9\x12'


In [23]:
key = generate_key(password, salt)
key

b'#\x84?\x88\x0e\x86\x1f\x93I\x12/)\x9f\x06\x96\x15M\xaa\x07)\xa8\r\x96\xd7\xec\x12\xdc<\x8dJz\x8e'

In [24]:
cipher = Cipher(algorithms.AES(key), modes.GCM(nonce, tag), backend=default_backend())
decryptor = cipher.decryptor()
decrypted_data = decryptor.update(encrypted_data) + decryptor.finalize()
decrypted_data

b'123\n321\n123'

In [25]:
with open("./code.txt", "wb") as f:
    f.write(decrypted_data)

- ## now check the file

In [26]:
!cat ./code.txt

123
321
123

# Complete Code

In [27]:
def generate_key(password: str, salt: bytes):
    
    kdf = PBKDF2HMAC(
        algorithm=hashes.SHA256(),
        length=32,  # AES-256 requires a 32-byte key
        salt=salt,
        iterations=100000,
        backend=default_backend(),
    )
    return kdf.derive(password.encode())

In [28]:
def encrypt(file,password):
    
    try:
        salt = os.urandom(16)
        nonce = os.urandom(12)
        
        with open(file, "rb") as f:
            file_data = f.read()
            
        key = generate_key(password, salt)
        
        cipher = Cipher(
            algorithms.AES(key), modes.GCM(nonce), backend=default_backend()
        )
        
        encryptor = cipher.encryptor()
        encrypted_data = encryptor.update(file_data) + encryptor.finalize()
        
        with open(file, "wb") as f:
            
            f.write(salt)  # salt
            f.write(nonce)  # nonce
            f.write(encrypted_data)  # encrypted data
            f.write(encryptor.tag)  # authentication tag for integrity
            print(f"File {file} encrypted successfully")
            
    except Exception as e:
        print(f"Error during encryption of {file}: {e}")

In [29]:
def decrypt(file, password):
    
    try:
        with open(file, "rb") as f:
            
            file_size = os.path.getsize(file)
            
            if file_size < 44:  # 16 bytes salt + 12 bytes nonce + 16 bytes tag
                raise ValueError(
                    f"File {file} is too small to be a valid encrypted file."
                )

            salt = f.read(16)
            nonce = f.read(12)

            # Now, read the encrypted data (all remaining data before the last 16 bytes which is the tag)
            encrypted_data = f.read(
                file_size - 16 - 12 - 16
            )  # Subtract salt, nonce, and tag lengths

            # Read the tag (last 16 bytes of the file)
            tag = f.read(16)

        # Check if the encrypted data or tag are empty
        if len(encrypted_data) <= 0 or len(tag) != 16:
            raise ValueError(
                f"Encrypted data or authentication tag in {file} is missing or corrupted."
            )

        key = generate_key(password, salt)

        cipher = Cipher(
            algorithms.AES(key), modes.GCM(nonce, tag), backend=default_backend()
        )
        decryptor = cipher.decryptor()

        decrypted_data = decryptor.update(encrypted_data) + decryptor.finalize()

        with open(file, "wb") as f:
            f.write(decrypted_data)  # Overwrite the file with decrypted data

        print(f"File {file} decrypted successfully")

    except Exception as e:
        print(f"Error during decryption of {file}: {e}")

In [30]:
def encrypt_folder(folder_path, password):
    for root, dirs, files in os.walk(folder_path):
        for file in files:
            file_path = os.path.join(root, file)
            encrypt(file_path, password)


def decrypt_folder(folder_path, password):
    for root, dirs, files in os.walk(folder_path):
        for file in files:
            file_path = os.path.join(root, file)
            decrypt(file_path, password)

# Generate random folders and files

In [31]:
os.mkdir('data')

In [32]:
import random

In [33]:
def gen(n):
    letters = [chr(i) for i in range(97,123)]
    word = ""
    for i in range(n) :
        word += random.choice(letters)
    return word.capitalize()

In [34]:
for directory in range(3):
    x = f"data/{gen(8)}"
    os.makedirs(x)
    for inner in range(2):
        os.makedirs(os.path.join(x,gen(3)))

In [35]:
for d,_,f in os.walk("./data"):
    for i in range(2):        
        with open(d+"/"+gen(3)+".txt","w+") as file:
            file.write(gen(4))

In [36]:
!tree ./data

[01;34m./data[0m
├── Cmh.txt
├── Exm.txt
├── [01;34mIaslzzmq[0m
│   ├── [01;34mCzh[0m
│   │   ├── Raj.txt
│   │   └── Siq.txt
│   ├── Kri.txt
│   ├── Nyt.txt
│   └── [01;34mWfk[0m
│       ├── Iiu.txt
│       └── Owl.txt
├── [01;34mKlutuaus[0m
│   ├── [01;34mBbc[0m
│   │   ├── Eag.txt
│   │   └── Ipa.txt
│   ├── Ezm.txt
│   ├── [01;34mJof[0m
│   │   ├── Hlf.txt
│   │   └── Iuz.txt
│   └── Ljp.txt
└── [01;34mMvbgktli[0m
    ├── [01;34mHtp[0m
    │   ├── Ggc.txt
    │   └── Qmh.txt
    ├── Mdk.txt
    ├── [01;34mQbt[0m
    │   ├── Beo.txt
    │   └── Ceu.txt
    └── Sxs.txt

9 directories, 20 files


In [38]:
# check 1 file
!cat ./data/Mvbgktli/Htp/Ggc.txt

Rwnt

In [39]:
# now encrypt it
encrypt_folder(folder_path="./data",password="mubeen")

File ./data/Cmh.txt encrypted successfully
File ./data/Exm.txt encrypted successfully
File ./data/Iaslzzmq/Nyt.txt encrypted successfully
File ./data/Iaslzzmq/Kri.txt encrypted successfully
File ./data/Iaslzzmq/Czh/Siq.txt encrypted successfully
File ./data/Iaslzzmq/Czh/Raj.txt encrypted successfully
File ./data/Iaslzzmq/Wfk/Owl.txt encrypted successfully
File ./data/Iaslzzmq/Wfk/Iiu.txt encrypted successfully
File ./data/Mvbgktli/Sxs.txt encrypted successfully
File ./data/Mvbgktli/Mdk.txt encrypted successfully
File ./data/Mvbgktli/Qbt/Ceu.txt encrypted successfully
File ./data/Mvbgktli/Qbt/Beo.txt encrypted successfully
File ./data/Mvbgktli/Htp/Qmh.txt encrypted successfully
File ./data/Mvbgktli/Htp/Ggc.txt encrypted successfully
File ./data/Klutuaus/Ezm.txt encrypted successfully
File ./data/Klutuaus/Ljp.txt encrypted successfully
File ./data/Klutuaus/Bbc/Eag.txt encrypted successfully
File ./data/Klutuaus/Bbc/Ipa.txt encrypted successfully
File ./data/Klutuaus/Jof/Iuz.txt encrypted

In [41]:
# check file again
!cat ./data/Klutuaus/Jof/Hlf.txt

���v
s6�'�T�iy_�Vw���؀H?�zg���Mw�`.�3��W&�

In [42]:
# lets decrypt with password
decrypt_folder(folder_path='./data',password="mubeen")

File ./data/Cmh.txt decrypted successfully
File ./data/Exm.txt decrypted successfully
File ./data/Iaslzzmq/Nyt.txt decrypted successfully
File ./data/Iaslzzmq/Kri.txt decrypted successfully
File ./data/Iaslzzmq/Czh/Siq.txt decrypted successfully
File ./data/Iaslzzmq/Czh/Raj.txt decrypted successfully
File ./data/Iaslzzmq/Wfk/Owl.txt decrypted successfully
File ./data/Iaslzzmq/Wfk/Iiu.txt decrypted successfully
File ./data/Mvbgktli/Sxs.txt decrypted successfully
File ./data/Mvbgktli/Mdk.txt decrypted successfully
File ./data/Mvbgktli/Qbt/Ceu.txt decrypted successfully
File ./data/Mvbgktli/Qbt/Beo.txt decrypted successfully
File ./data/Mvbgktli/Htp/Qmh.txt decrypted successfully
File ./data/Mvbgktli/Htp/Ggc.txt decrypted successfully
File ./data/Klutuaus/Ezm.txt decrypted successfully
File ./data/Klutuaus/Ljp.txt decrypted successfully
File ./data/Klutuaus/Bbc/Eag.txt decrypted successfully
File ./data/Klutuaus/Bbc/Ipa.txt decrypted successfully
File ./data/Klutuaus/Jof/Iuz.txt decrypted

In [43]:
# check file again
!cat ./data/Klutuaus/Jof/Hlf.txt

Jvfv

# Try on Large Data

In [44]:
encrypt_folder("/home/mubeen/Videos/test","mubeen")

File /home/mubeen/Videos/test/passwords.txt encrypted successfully
File /home/mubeen/Videos/test/fastapi.mp4 encrypted successfully


In [45]:
decrypt_folder("/home/mubeen/Videos/test","mubeen")

File /home/mubeen/Videos/test/passwords.txt decrypted successfully
File /home/mubeen/Videos/test/fastapi.mp4 decrypted successfully
