In [10]:
import pandas as pd
from Crypto.Cipher import AES
from Crypto.Util.Padding import pad, unpad
from base64 import b64encode, b64decode
import os

# Define the secret key (must be 16, 24, or 32 bytes long)
SECRET_KEY = b'policedata111124'

# Function to encrypt a value using AES
def encrypt_value(value):
    cipher = AES.new(SECRET_KEY, AES.MODE_CBC, os.urandom(16))
    ct_bytes = cipher.encrypt(pad(value.encode(), AES.block_size))
    iv = b64encode(cipher.iv).decode('utf-8')
    ct = b64encode(ct_bytes).decode('utf-8')
    return iv, ct

# Function to decrypt a value using AES
def decrypt_value(iv, ct):
    iv = b64decode(iv)
    ct = b64decode(ct)
    cipher = AES.new(SECRET_KEY, AES.MODE_CBC, iv)
    pt = unpad(cipher.decrypt(ct), AES.block_size)
    return pt.decode('utf-8')

# Encrypt personal information in a DataFrame
# def encrypt_dataframe(df):
#     personal_info_columns = []
#     encrypted_columns = {}
    
#     # Define keywords to identify personal information columns
#     personal_info_keywords = ['name', 'address', 'phone', 'email', 'cast', 'district_name', 'unitname']
    
#     # Identify columns containing personal information
#     for column in df.columns:
#         if any(keyword in column.lower() for keyword in personal_info_keywords):
#             personal_info_columns.append(column)
    
#     # Encrypt personal information in identified columns for the first 1000 rows
#     for column in personal_info_columns:
#         iv_list, ct_list = [], []
#         for value in df[column].iloc[:100000]:  
#             if pd.notnull(value):  # Check if value is not missing
#                 iv, ct = encrypt_value(str(value))
#                 iv_list.append(iv)
#                 ct_list.append(ct)
#             else:
#                 iv_list.append(None)  # Add None for missing values
#                 ct_list.append(None)
#         encrypted_columns[f"{column}_IV"] = iv_list
#         encrypted_columns[f"{column}_CT"] = ct_list
#         df.drop(columns=[column], inplace=True)
    
    # # Add encrypted columns to DataFrame
    # for key, value in encrypted_columns.items():
    #     df[key] = value

# Encrypt personal information in a DataFrame
# Encrypt personal information in a DataFrame
def encrypt_dataframe(df):
    personal_info_columns = []
    encrypted_columns = {}
    
    # Define keywords to identify personal information columns
    personal_info_keywords = ['name', 'address', 'phone', 'email', 'cast']
    
    # Identify columns containing personal information
    for column in df.columns:
        if any(keyword in column.lower() for keyword in personal_info_keywords):
            personal_info_columns.append(column)
    
    # Exclude 'UnitName' and 'CountryName' columns from encryption
    personal_info_columns = [col for col in personal_info_columns if col not in ['UnitName', 'DistrictName','District_Name','Unit_Name']]
    
    # Encrypt personal information in identified columns for the first 1000 rows
    for column in personal_info_columns:
        iv_list, ct_list = [], []
        for value in df[column].iloc[:100000]:  
            if pd.notnull(value):  # Check if value is not missing
                iv, ct = encrypt_value(str(value))
                iv_list.append(iv)
                ct_list.append(ct)
            else:
                iv_list.append(None)  # Add None for missing values
                ct_list.append(None)
        encrypted_columns[f"{column}_IV"] = iv_list
        encrypted_columns[f"{column}_CT"] = ct_list
        df.drop(columns=[column], inplace=True)
    
    # Add encrypted columns to DataFrame
    for key, value in encrypted_columns.items():
        df[key] = value


# Decrypt personal information in a DataFrame
def decrypt_dataframe(df):
    for column in df.columns:
        if column.endswith('_CT'):
            iv_column = column.replace('_CT', '_IV')
            decrypted_values = []
            for i in range(len(df)):
                iv = df.at[i, iv_column]
                ct = df.at[i, column]
                if pd.notnull(iv) and pd.notnull(ct):  # Check if both IV and CT are not missing
                    decrypted_value = decrypt_value(iv, ct)
                else:
                    decrypted_value = None  # Handle missing values by setting decrypted value to None
                decrypted_values.append(decrypted_value)
            df[column.replace('_CT', '')] = decrypted_values
            df.drop(columns=[column, iv_column], inplace=True)

# Encrypt CSV file
def encrypt_csv(input_file, output_file):
    # Read CSV into DataFrame taking only first 1000 rows
    df = pd.read_csv(input_file, nrows=100000)
    
    # Encrypt personal information in the DataFrame
    encrypt_dataframe(df)
    
    # Write encrypted DataFrame to a new CSV file
    df.to_csv(output_file, index=False)

# Decrypt CSV file
def decrypt_csv(input_file, output_file):
    # Read encrypted CSV into DataFrame
    df = pd.read_csv(input_file)
    
    # Decrypt personal information in the DataFrame
    decrypt_dataframe(df)
    
    # Write decrypted DataFrame to a new CSV file
    df.to_csv(output_file, index=False)

# Example usage:
input_file = 'AccusedData.csv'
output_file_encrypted = 'encrypted.csv'
output_file_decrypted = 'decrypted.csv'

# Encrypt CSV file
encrypt_csv(input_file, output_file_encrypted)

# Decrypt CSV file
decrypt_csv(output_file_encrypted, output_file_decrypted)
