In [1]:
import pandas as pd
import numpy as np
import random
import os
from cryptography.fernet import Fernet
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
from cryptography.hazmat.backends import default_backend
from cryptography.hazmat.primitives.ciphers.algorithms import TripleDES, Blowfish
from cryptography.hazmat.primitives.ciphers.algorithms import ChaCha20
from scipy.io import savemat

# Load CSV file
df = pd.read_csv('training1600000processednoemoticon.csv', header=None, encoding='ISO-8859-1',usecols=[5])  

# Generate or reuse Fernet key
fernet_key = Fernet.generate_key()
fernet_cipher = Fernet(fernet_key)

# Generate symmetric keys
aes_key = os.urandom(32)
des_key = os.urandom(24)
blowfish_key = os.urandom(16)
chacha_key = os.urandom(32)
backend = default_backend()

# AES encryption with different modes
def aes_encrypt(data, key, mode_name):
    iv = os.urandom(16)
    if mode_name == 'CBC':
        cipher = Cipher(algorithms.AES(key), modes.CBC(iv), backend=backend)
    elif mode_name == 'CFB':
        cipher = Cipher(algorithms.AES(key), modes.CFB(iv), backend=backend)
    elif mode_name == 'OFB':
        cipher = Cipher(algorithms.AES(key), modes.OFB(iv), backend=backend)
    elif mode_name == 'CTR':
        cipher = Cipher(algorithms.AES(key), modes.CTR(iv), backend=backend)
    elif mode_name == 'ECB':
        cipher = Cipher(algorithms.AES(key), modes.ECB(), backend=backend)
        iv = b''  # ECB has no IV
    else:
        raise ValueError("Unsupported AES mode")
    encryptor = cipher.encryptor()
    padded_data = data + b' ' * ((16 - len(data) % 16) % 16)
    return encryptor.update(padded_data) + encryptor.finalize(), iv

def aes_decrypt(ciphertext, key, iv, mode_name):
    if mode_name == 'CBC':
        cipher = Cipher(algorithms.AES(key), modes.CBC(iv), backend=backend)
    elif mode_name == 'CFB':
        cipher = Cipher(algorithms.AES(key), modes.CFB(iv), backend=backend)
    elif mode_name == 'OFB':
        cipher = Cipher(algorithms.AES(key), modes.OFB(iv), backend=backend)
    elif mode_name == 'CTR':
        cipher = Cipher(algorithms.AES(key), modes.CTR(iv), backend=backend)
    elif mode_name == 'ECB':
        cipher = Cipher(algorithms.AES(key), modes.ECB(), backend=backend)
    else:
        raise ValueError("Unsupported AES mode")
    decryptor = cipher.decryptor()
    return decryptor.update(ciphertext) + decryptor.finalize()

def triple_des_encrypt(data, key):
    iv = os.urandom(8)
    cipher = Cipher(TripleDES(key), modes.CFB(iv), backend=backend)
    encryptor = cipher.encryptor()
    return encryptor.update(data) + encryptor.finalize(), iv

def triple_des_decrypt(ciphertext, key, iv):
    cipher = Cipher(TripleDES(key), modes.CFB(iv), backend=backend)
    decryptor = cipher.decryptor()
    return decryptor.update(ciphertext) + decryptor.finalize()

def blowfish_encrypt(data, key):
    iv = os.urandom(8)
    cipher = Cipher(Blowfish(key), modes.CFB(iv), backend=backend)
    encryptor = cipher.encryptor()
    return encryptor.update(data) + encryptor.finalize(), iv

def blowfish_decrypt(ciphertext, key, iv):
    cipher = Cipher(Blowfish(key), modes.CFB(iv), backend=backend)
    decryptor = cipher.decryptor()
    return decryptor.update(ciphertext) + decryptor.finalize()

def chacha_encrypt(data, key):
    nonce = os.urandom(16)
    cipher = Cipher(ChaCha20(key, nonce), mode=None, backend=backend)
    encryptor = cipher.encryptor()
    return encryptor.update(data), nonce

def chacha_decrypt(ciphertext, key, nonce):
    cipher = Cipher(ChaCha20(key, nonce), mode=None, backend=backend)
    decryptor = cipher.decryptor()
    return decryptor.update(ciphertext)

def split_text_by_bit_length(text, min_bits=300, max_bits=700):
    words = str(text).split()
    chunks = []
    current_chunk = []
    current_bit_len = 0
    target_bit_len = random.randint(min_bits, max_bits)

    for word in words:
        try:
            word_utf8 = word.encode('utf-8')
        except UnicodeEncodeError:
            continue
        word_bit_len = len(word_utf8) * 8
        if current_bit_len + word_bit_len > target_bit_len:
            if current_chunk:
                chunks.append(" ".join(current_chunk))
            current_chunk = [word]
            current_bit_len = word_bit_len
            target_bit_len = random.randint(min_bits, max_bits)
        else:
            current_chunk.append(word)
            current_bit_len += word_bit_len
    if current_chunk:
        chunks.append(" ".join(current_chunk))
    return chunks

df

  from cryptography.hazmat.primitives.ciphers.algorithms import TripleDES, Blowfish


Unnamed: 0,5
0,"@switchfoot http://twitpic.com/2y1zl - Awww, t..."
1,is upset that he can't update his Facebook by ...
2,@Kenichan I dived many times for the ball. Man...
3,my whole body feels itchy and like its on fire
4,"@nationwideclass no, it's not behaving at all...."
...,...
1048571,My GrandMa is making Dinenr with my Mum
1048572,Mid-morning snack time... A bowl of cheese noo...
1048573,@ShaDeLa same here say it like from the Termi...
1048574,@DestinyHope92 im great thaanks wbuu?


In [None]:
# Process data and collect encrypted records
records = []
aes_modes = ['CBC', 'CFB', 'OFB', 'CTR', 'ECB']

for index, row in df.iterrows():
    text = " ".join(map(str, row.values))
    chunks = split_text_by_bit_length(text)
    for chunk in chunks:
        data = chunk.encode('utf-8')

        aes_mode = random.choice(aes_modes)
        aes_enc, aes_iv = aes_encrypt(data, aes_key, aes_mode)
        aes_dec = aes_decrypt(aes_enc, aes_key, aes_iv, aes_mode).rstrip()

        des_enc, des_iv = triple_des_encrypt(data, des_key)
        des_dec = triple_des_decrypt(des_enc, des_key, des_iv)

        bf_enc, bf_iv = blowfish_encrypt(data, blowfish_key)
        bf_dec = blowfish_decrypt(bf_enc, blowfish_key, bf_iv)

        chacha_enc, chacha_nonce = chacha_encrypt(data, chacha_key)
        chacha_dec = chacha_decrypt(chacha_enc, chacha_key, chacha_nonce)

        fernet_enc = fernet_cipher.encrypt(data)
        fernet_dec = fernet_cipher.decrypt(fernet_enc)

        # Validate all decryptions match original
        if all(dec.startswith(data) for dec in [aes_dec, des_dec, bf_dec, chacha_dec, fernet_dec]):
            records.append({
                'original': data,
                'aes_input_iv': aes_iv,
                'aes_input_key': aes_key,
                'aes': aes_enc,
                'aes_mode': aes_mode,
                '3des_input_iv': des_iv,
                '3des_input_key': des_key,
                '3des': des_enc,
                'blowfish_input_iv': bf_iv,
                'blowfish_input_key': blowfish_key,
                'blowfish': bf_enc,
                'chacha_input_nonce': chacha_nonce,
                'chacha_input_key': chacha_key,
                'chacha20': chacha_enc,
                'fernet_key': fernet_key,
                'fernet': fernet_enc
            })


def to_uint8(x):
    return np.frombuffer(x, dtype=np.uint8)


matlab_data = {
    'original': [to_uint8(r['original']) for r in records],
    'aes_input_iv': [to_uint8(r['aes_input_iv']) for r in records],
    'aes_input_key': [to_uint8(r['aes_input_key']) for r in records],
    'aes': [to_uint8(r['aes']) for r in records],
    'aes_mode': [r['aes_mode'] for r in records], 
    'triple_des_input_iv': [to_uint8(r['3des_input_iv']) for r in records],
    'triple_des_input_key': [to_uint8(r['3des_input_key']) for r in records],
    'triple_des': [to_uint8(r['3des']) for r in records],
    'blowfish_input_iv': [to_uint8(r['blowfish_input_iv']) for r in records],
    'blowfish_input_key': [to_uint8(r['blowfish_input_key']) for r in records],
    'blowfish': [to_uint8(r['blowfish']) for r in records],
    'chacha_input_nonce': [to_uint8(r['chacha_input_nonce']) for r in records],
    'chacha_input_key': [to_uint8(r['chacha_input_key']) for r in records],
    'chacha20': [to_uint8(r['chacha20']) for r in records],
    'fernet_key': [to_uint8(r['fernet_key']) for r in records],
    'fernet': [to_uint8(r['fernet']) for r in records]
}

# Save as .mat file for MATLAB
savemat('encryptedDataset.mat', matlab_data)