[Reference](https://medium.com/@tubelwj/python-dataframe-data-masking-techniques-c7574703a852)

# Data Preparation

In [1]:
import pandas as pd

# Create sample data
data = {
    'Name': ['John Doe', 'Jane Smith', 'Michael Johnson', 'Emily Davis', 'David Wilson',
             'Sarah Brown', 'James Miller', 'Laura Taylor', 'Daniel Anderson', 'Olivia Thomas'],
    'SSN': ['123-45-6789', '987-65-4321', '456-78-9012', '345-67-8901', '234-56-7890',
            '567-89-0123', '678-90-1234', '789-01-2345', '890-12-3456', '901-23-4567'],
    'Phone Number': ['202-555-0198', '305-555-0172', '212-555-0145', '415-555-0109', '512-555-0190',
                     '617-555-0157', '718-555-0163', '202-555-0137', '213-555-0181', '303-555-0149'],
    'Email': ['johndoe@gmail.com', 'janesmith@hotmail.com', 'michaeljohnson@yahoo.com',
              'emily.davis@abc_company.com', 'david.wilson@road_company.com',
              'sarah.brown@gmail.com', 'james.miller@yahoo.com',
              'laura.taylor@hotmail.com', 'daniel.anderson@ai_company.com',
              'olivia.thomas@gmail.com']
}

# Convert data to a DataFrame
df = pd.DataFrame(data)

# Print the customer data
print("Customer Data:")
print(df)

Customer Data:
              Name          SSN  Phone Number                           Email
0         John Doe  123-45-6789  202-555-0198               johndoe@gmail.com
1       Jane Smith  987-65-4321  305-555-0172           janesmith@hotmail.com
2  Michael Johnson  456-78-9012  212-555-0145        michaeljohnson@yahoo.com
3      Emily Davis  345-67-8901  415-555-0109     emily.davis@abc_company.com
4     David Wilson  234-56-7890  512-555-0190   david.wilson@road_company.com
5      Sarah Brown  567-89-0123  617-555-0157           sarah.brown@gmail.com
6     James Miller  678-90-1234  718-555-0163          james.miller@yahoo.com
7     Laura Taylor  789-01-2345  202-555-0137        laura.taylor@hotmail.com
8  Daniel Anderson  890-12-3456  213-555-0181  daniel.anderson@ai_company.com
9    Olivia Thomas  901-23-4567  303-555-0149         olivia.thomas@gmail.com


# Data Masking

In [2]:
import pandas as pd

# Create sample data
data = {
    'Name': ['John Doe', 'Jane Smith', 'Michael Johnson', 'Emily Davis', 'David Wilson',
             'Sarah Brown', 'James Miller', 'Laura Taylor', 'Daniel Anderson', 'Olivia Thomas'],
    'SSN': ['123-45-6789', '987-65-4321', '456-78-9012', '345-67-8901', '234-56-7890',
            '567-89-0123', '678-90-1234', '789-01-2345', '890-12-3456', '901-23-4567'],
    'Phone Number': ['202-555-0198', '305-555-0172', '212-555-0145', '415-555-0109', '512-555-0190',
                     '617-555-0157', '718-555-0163', '202-555-0137', '213-555-0181', '303-555-0149'],
    'Email': ['johndoe@gmail.com', 'janesmith@hotmail.com', 'michaeljohnson@yahoo.com',
              'emily.davis@abc_company.com', 'david.wilson@road_company.com',
              'sarah.brown@gmail.com', 'james.miller@yahoo.com',
              'laura.taylor@hotmail.com', 'daniel.anderson@ai_company.com',
              'olivia.thomas@gmail.com']
}

# Convert data to a DataFrame
df = pd.DataFrame(data)

# Phone number anonymization
df['Phone Number_Anonymized'] = df['Phone Number'].str.replace(r'(\d{3})\d{4}(\d{4})', r'\1****\2', regex=True)

# SSN anonymization
df['SSN_Anonymized'] = df['SSN'].str.replace(r'(\d{3})-\d{2}-(\d{4})', r'\1-**-\2', regex=True)

# Print the anonymized data
print("Anonymized Data:")
print(df[['Name', 'Phone Number_Anonymized', 'SSN_Anonymized']])

Anonymized Data:
              Name Phone Number_Anonymized SSN_Anonymized
0         John Doe            202-555-0198    123-**-6789
1       Jane Smith            305-555-0172    987-**-4321
2  Michael Johnson            212-555-0145    456-**-9012
3      Emily Davis            415-555-0109    345-**-8901
4     David Wilson            512-555-0190    234-**-7890
5      Sarah Brown            617-555-0157    567-**-0123
6     James Miller            718-555-0163    678-**-1234
7     Laura Taylor            202-555-0137    789-**-2345
8  Daniel Anderson            213-555-0181    890-**-3456
9    Olivia Thomas            303-555-0149    901-**-4567


# Data Hashing

In [3]:
import pandas as pd
import hashlib

# Define the hash function
def hash_value(value):
    return hashlib.sha256(value.encode()).hexdigest()

# Create sample data
data = {
    'Name': ['John Doe', 'Jane Smith', 'Michael Johnson', 'Emily Davis', 'David Wilson',
             'Sarah Brown', 'James Miller', 'Laura Taylor', 'Daniel Anderson', 'Olivia Thomas'],
    'SSN': ['123-45-6789', '987-65-4321', '456-78-9012', '345-67-8901', '234-56-7890',
            '567-89-0123', '678-90-1234', '789-01-2345', '890-12-3456', '901-23-4567'],
    'Phone Number': ['202-555-0198', '305-555-0172', '212-555-0145', '415-555-0109', '512-555-0190',
                     '617-555-0157', '718-555-0163', '202-555-0137', '213-555-0181', '303-555-0149'],
    'Email': ['johndoe@gmail.com', 'janesmith@hotmail.com', 'michaeljohnson@yahoo.com',
              'emily.davis@abc_company.com', 'david.wilson@road_company.com',
              'sarah.brown@gmail.com', 'james.miller@yahoo.com',
              'laura.taylor@hotmail.com', 'daniel.anderson@ai_company.com',
              'olivia.thomas@gmail.com']
}

# Convert data to a DataFrame
df = pd.DataFrame(data)

# Apply hash function to email column
df['Email_Hash'] = df['Email'].apply(hash_value)

# Print the customer data with hashed emails
print("Customer Data with Hashed Emails:")
print(df[['Name', 'Email', 'Email_Hash']])

Customer Data with Hashed Emails:
              Name                           Email  \
0         John Doe               johndoe@gmail.com   
1       Jane Smith           janesmith@hotmail.com   
2  Michael Johnson        michaeljohnson@yahoo.com   
3      Emily Davis     emily.davis@abc_company.com   
4     David Wilson   david.wilson@road_company.com   
5      Sarah Brown           sarah.brown@gmail.com   
6     James Miller          james.miller@yahoo.com   
7     Laura Taylor        laura.taylor@hotmail.com   
8  Daniel Anderson  daniel.anderson@ai_company.com   
9    Olivia Thomas         olivia.thomas@gmail.com   

                                          Email_Hash  
0  06a240d11cc201676da976f7b49341181fd180da37cbe4...  
1  9595c954d6f915597a49733eb703cb8a78ac5943be639f...  
2  56b74a97601e9594ac8b8ccc69a60616a3e5b0602339a4...  
3  bd8908ae5aee0e45694870f0b51cdaa4e7dd08f4d944c4...  
4  34f5b7151db89e6a970942c9663548c6fc7bb9e8f16165...  
5  906aa0f301f430f74f8807b5386ebf466ea36a

# Data Randomization

In [4]:
import random
import pandas as pd

# Create sample data
data = {
    'Name': ['John Doe', 'Jane Smith', 'Michael Johnson', 'Emily Davis', 'David Wilson',
             'Sarah Brown', 'James Miller', 'Laura Taylor', 'Daniel Anderson', 'Olivia Thomas'],
    'SSN': ['123-45-6789', '987-65-4321', '456-78-9012', '345-67-8901', '234-56-7890',
            '567-89-0123', '678-90-1234', '789-01-2345', '890-12-3456', '901-23-4567'],
    'Phone Number': ['202-555-0198', '305-555-0172', '212-555-0145', '415-555-0109', '512-555-0190',
                     '617-555-0157', '718-555-0163', '202-555-0137', '213-555-0181', '303-555-0149'],
    'Email': ['johndoe@gmail.com', 'janesmith@hotmail.com', 'michaeljohnson@yahoo.com',
              'emily.davis@abc_company.com', 'david.wilson@road_company.com',
              'sarah.brown@gmail.com', 'james.miller@yahoo.com',
              'laura.taylor@hotmail.com', 'daniel.anderson@ai_company.com',
              'olivia.thomas@gmail.com']
}

# Convert data to a DataFrame
df = pd.DataFrame(data)

# Define randomize function for names
def randomize_name(names):
    return random.choice(names)

# Randomize names with predefined list
random_names = ['John Doe', 'Jane Smith', 'Michael Johnson', 'Emily Davis', 'David Wilson',
                'Sarah Brown', 'James Miller', 'Laura Taylor', 'Daniel Anderson', 'Olivia Thomas']

# Apply randomization
df['Name_Randomized'] = [randomize_name(random_names) for _ in range(len(df))]

# Print the results
print("Randomized Data:")
print(df[['Name', 'Name_Randomized']])

Randomized Data:
              Name  Name_Randomized
0         John Doe      Sarah Brown
1       Jane Smith     James Miller
2  Michael Johnson      Emily Davis
3      Emily Davis      Emily Davis
4     David Wilson      Sarah Brown
5      Sarah Brown    Olivia Thomas
6     James Miller     Laura Taylor
7     Laura Taylor     Laura Taylor
8  Daniel Anderson  Michael Johnson
9    Olivia Thomas  Michael Johnson


# Data Encryption

In [5]:
from cryptography.fernet import Fernet
import pandas as pd

# Create sample data
data = {
    'Name': ['John Doe', 'Jane Smith', 'Michael Johnson', 'Emily Davis', 'David Wilson',
             'Sarah Brown', 'James Miller', 'Laura Taylor', 'Daniel Anderson', 'Olivia Thomas'],
    'SSN': ['123-45-6789', '987-65-4321', '456-78-9012', '345-67-8901', '234-56-7890',
            '567-89-0123', '678-90-1234', '789-01-2345', '890-12-3456', '901-23-4567'],
    'Phone Number': ['202-555-0198', '305-555-0172', '212-555-0145', '415-555-0109', '512-555-0190',
                     '617-555-0157', '718-555-0163', '202-555-0137', '213-555-0181', '303-555-0149'],
    'Email': ['johndoe@gmail.com', 'janesmith@hotmail.com', 'michaeljohnson@yahoo.com',
              'emily.davis@abc_company.com', 'david.wilson@road_company.com',
              'sarah.brown@gmail.com', 'james.miller@yahoo.com',
              'laura.taylor@hotmail.com', 'daniel.anderson@ai_company.com',
              'olivia.thomas@gmail.com']
}

# Convert data to a DataFrame
df = pd.DataFrame(data)

# Generate encryption key and cipher
key = Fernet.generate_key()
cipher = Fernet(key)

# Encrypt the phone numbers
df['Phone Number_Encrypted'] = df['Phone Number'].apply(lambda x: cipher.encrypt(x.encode()).decode())

# Decrypt the phone numbers
df['Phone Number_Decrypted'] = df['Phone Number_Encrypted'].apply(lambda x: cipher.decrypt(x.encode()).decode())

# Print the results
print("Encrypted and Decrypted Data:")
print(df[['Name', 'Phone Number', 'Phone Number_Encrypted', 'Phone Number_Decrypted']])

Encrypted and Decrypted Data:
              Name  Phone Number  \
0         John Doe  202-555-0198   
1       Jane Smith  305-555-0172   
2  Michael Johnson  212-555-0145   
3      Emily Davis  415-555-0109   
4     David Wilson  512-555-0190   
5      Sarah Brown  617-555-0157   
6     James Miller  718-555-0163   
7     Laura Taylor  202-555-0137   
8  Daniel Anderson  213-555-0181   
9    Olivia Thomas  303-555-0149   

                              Phone Number_Encrypted Phone Number_Decrypted  
0  gAAAAABn0s-Q6tr95DYCQyYELEfpPeejz3T-nFLjArTPFk...           202-555-0198  
1  gAAAAABn0s-Q91zOtjPyFtuOqH1vWrAiDqbGmscqZWqCO6...           305-555-0172  
2  gAAAAABn0s-QP8BwO9qf4bL6_5DUWFrLlSBo1rOhRVwYvd...           212-555-0145  
3  gAAAAABn0s-QsCSb9n76SiuPnJwZ3rIZIskcfILIErzduV...           415-555-0109  
4  gAAAAABn0s-Q0fycFYNrJNfDZrR3dMXlPEQBxkrVKiUP0D...           512-555-0190  
5  gAAAAABn0s-QuEvYLn1NEl3RwTlulnWGecMCRZnuLQ8QyJ...           617-555-0157  
6  gAAAAABn0s-Qt3XCXMHsken4