In [8]:
# zipminator/zipit.py
import pandas as pd
import pyzipper
import time
import threading
import getpass
import os
import hashlib
import random
import string
from typing import List


class Zipndel:
    """Class for compressing and encrypting Pandas DataFrames and deleting the original file.

    Attributes:
        file_name (str): The name of the file to be written, default is 'df'.
        file_format (str): The file format of the file to be written, default is 'csv'.
        self_destruct_time (tuple): A tuple of (hours, minutes, seconds) until self-destruct, default is (672, 0, 0).
        password (str): The password to use for the zip file, default is None.
        encryption_algorithm (str): The encryption algorithm to use for the zip file, default is 'AES'.
        mask_columns (list): The list of columns to mask, default is None.
        anonymize_columns (list): The list of columns to anonymize, default is None.
        compliance_check (bool): Whether to perform a compliance check on the data, default is False.
        audit_trail (bool): Whether to keep an audit trail, default is False.

    Methods:
        mask_columns(df: pd.DataFrame, columns: List[str]) -> pd.DataFrame:
            Mask sensitive data in the specified DataFrame columns by applying a SHA-256 hash function.

        anonymize_columns(df: pd.DataFrame, columns: List[str]) -> pd.DataFrame:
            Anonymize sensitive data in the specified DataFrame columns by replacing it with random characters.

        zipit(df: pd.DataFrame) -> None:
            Write the input DataFrame to a file, create a zip file with the written file, set a password for the zip file,
            and delete the written file.

        self_destruct(hours: int, minutes: int, seconds: int) -> None:
            Delete the compressed and encrypted file after a specified amount of time.

        decompress_and_read() -> pd.DataFrame:
            Unzip the file, read it using pandas, and delete the unzipped file.

    Example:
        >>> df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]})
        >>> Zipndel(file_name='my_file', password='my_password', mask_columns=['B'], anonymize_columns=['C']).zipit(df)
    """

    def __init__(self, file_name='df', file_format='csv', self_destruct_time=(672, 0, 0), password=None, encryption_algorithm='AES', mask_columns=None, anonymize_columns=None, compliance_check=False, audit_trail=False):
        """
        Initialize the Zipndel object.

        Args:
            file_name (str): The name of the file to be written, default is 'df'.
            file_format (str): The file format of the file to be written, default is 'csv'.
            self_destruct_time (tuple): A tuple of (hours, minutes, seconds) until self-destruct, default is (672, 0, 0).
            password (str): The password to use for the zip file, default is None.
            encryption_algorithm (str): The encryption algorithm to use for the zip file, default is 'AES'.
            mask_columns (list): The list of columns to mask, default is None.
            anonymize_columns (list): The list of columns to anonymize, default is None.
            compliance_check (bool): Whether to perform a compliance check on the data, default is False.
            audit_trail (bool): Whether to keep an audit trail, default is False.
            """

        self.file_name = file_name
        self.file_format = file_format
        self.self_destruct_time = self_destruct_time
        self.password = password
        self.encryption_algorithm = encryption_algorithm
        self.mask_columns = mask_columns
        self.anonymize_columns = anonymize_columns
        self.compliance_check = compliance_check
        self.audit_trail = audit_trail
        
    def mask_columns(self, df: pd.DataFrame, columns: List[str]) -> pd.DataFrame:
        """Mask sensitive data in the specified DataFrame columns by applying a SHA-256 hash function.

        Args:
            df (pandas.DataFrame): The DataFrame to mask sensitive data in.
            columns (list): A list of strings specifying the names of the columns to mask.

        Returns:
            pandas.DataFrame: A copy of the input DataFrame with the specified columns masked.
        """
        df = df.copy()
        for col in columns:
            df[col] = df[col].apply(
                lambda x: hashlib.sha256(str(x).encode()).hexdigest())
        return df
    
    def anonymize_columns(self, df: pd.DataFrame, columns: List[str]) -> pd.DataFrame:
        """Anonymize sensitive data in the specified DataFrame columns by replacing it with random characters.

        Args:
            df (pandas.DataFrame): The DataFrame to anonymize sensitive data in.
            columns (list): A list of strings specifying the names of the columns to anonymize.

        Returns:
            pandas.DataFrame: A copy of the input DataFrame with the specified columns anonymized.
        """
        df = df.copy()
        for col in columns:
            df[col] = df[col].apply(lambda x: ''.join(
                random.choices(string.ascii_uppercase + string.digits, k=10)))
        return df    
    
    def zipit(self, df: pd.DataFrame) -> None:
        """
        Write the input DataFrame to a file, create a zip file with the written file, set a password for the zip file,
        and delete the written file.

        Args:
            df (pandas.DataFrame): The DataFrame to compress and encrypt.

        Returns:
            None

        Raises:
            None
        """
        if self.mask_columns is not None:
            df = self.mask_columns(df, self.mask_columns)

        if self.anonymize_columns is not None:
            df = self.anonymize_columns(df, self.anonymize_columns)

        write_func = getattr(df, f'to_{self.file_format}')
        write_func(self.file_name, index=False)

        df_zip = f"{self.file_name}.zip"
        with pyzipper.AESZipFile(df_zip, 'w', compression=pyzipper.ZIP_DEFLATED, encryption=getattr(pyzipper, f'WZ_{self.encryption_algorithm}')) as zf:
            if self.password is None:
                self.password = getpass.getpass('Enter password: ')
            else:
                print('Using provided password.')
            zf.setpassword(self.password.encode('utf-8'))
            zf.write(self.file_name)

        os.remove(self.file_name)

        if self.self_destruct_time and self.self_destruct_time != False:
            t = threading.Thread(target=self.self_destruct,
                                    args=self.self_destruct_time)
            t.start()


    def self_destruct(self, hours: int, minutes: int, seconds: int) -> None:
        """Delete the compressed and encrypted file after a specified amount of time has elapsed.

        Args:
            hours (int): The number of hours until file deletion.
            minutes (int): The number of minutes until file deletion.
            seconds (int): The number of seconds until file deletion.

        Returns:
        None
        """
        df_zip = f"{self.file_name}.zip"
        self_destruct_time = time.time() + hours * 60 * 60 + minutes * 60 + seconds
        while True:
            if time.time() > self_destruct_time:
                os.remove(df_zip)
                break
            time.sleep(5)








In [9]:
import pandas as pd

# Create a DataFrame with some sample data
data = pd.DataFrame({
    'Column1': ['Value1', 'Value2', 'Value3', 'Value4', 'Value5'],
    'Column2': ['A', 'B', 'C', 'D', 'E'],
    'Column3': ['John', 'Jane', 'Jim', 'Jill', 'Jake'],
    'Column4': [25, 30, 35, 40, 45],
    'Column5': [True, False, True, False, True]
})

# Print the DataFrame
print(data)

  Column1 Column2 Column3  Column4  Column5
0  Value1       A    John       25     True
1  Value2       B    Jane       30    False
2  Value3       C     Jim       35     True
3  Value4       D    Jill       40    False
4  Value5       E    Jake       45     True


In [10]:
#from zipminator.zipit import Zipndel

# Initialize a Zipndel object with default values
zipper = Zipndel()

# Initialize a Zipndel object with custom values
zipper = Zipndel(
    file_name='my_file',
    file_format='csv',
    self_destruct_time=(0, 1, 0), # 24 hours until self-destruct
    password='my_password',
    encryption_algorithm='AES',
    mask_columns=['column1', 'column2'],
    anonymize_columns=['column3', 'column4'],
    compliance_check=True,
    audit_trail=True
)


# 2.attempt

In [14]:
import pandas as pd
import pyzipper
import time
import threading
import getpass
import os
import hashlib
import random
import string
from typing import List


class Zipndel:
    """Class for compressing and encrypting Pandas DataFrames and deleting the original file.

    Attributes:
        file_name (str): The name of the file to be written, default is 'df'.
        file_format (str): The file format of the file to be written, default is 'csv'.
        self_destruct_time (tuple): A tuple of (hours, minutes, seconds) until self-destruct, default is (672, 0, 0).
        password (str): The password to use for the zip file, default is None.
        encryption_algorithm (str): The encryption algorithm to use for the zip file, default is 'AES'.
        mask_columns (list): The list of columns to mask, default is None.
        anonymize_columns (list): The list of columns to anonymize, default is None.
        compliance_check (bool): Whether to perform a compliance check on the data, default is False.
        audit_trail (bool): Whether to keep an audit trail, default is False.

    Methods:
        mask_columns(df: pd.DataFrame, columns: List[str]) -> pd.DataFrame:
            Mask sensitive data in the specified DataFrame columns by applying a SHA-256 hash function.

        anonymize_columns(df: pd.DataFrame, columns: List[str]) -> pd.DataFrame:
            Anonymize sensitive data in the specified DataFrame columns by replacing it with random characters.

        zipit(df: pd.DataFrame) -> None:
            Write the input DataFrame to a file, create a zip file with the written file, set a password for the zip file,
            and delete the written file.

        self_destruct(hours: int, minutes: int, seconds: int) -> None:
            Delete the compressed and encrypted file after a specified amount of time.

        decompress_and_read() -> pd.DataFrame:
            Unzip the file, read it using pandas, and delete the unzipped file.

    Example:
        >>> df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]})
        >>> Zipndel(file_name='my_file', password='my_password', mask_columns=['B'], anonymize_columns=['C']).zipit(df)
    """

    def __init__(self, file_name='df', file_format='csv', self_destruct_time=(672, 0, 0), password=None, encryption_algorithm='AES', mask_columns=None, anonymize_columns=None, compliance_check=False, audit_trail=False):
        """
        Initialize the Zipndel object.

        Args:
            file_name (str): The name of the file to be written, default is 'df'.
            file_format (str): The file format of the file to be written, default is 'csv'.
            self_destruct_time (tuple): A tuple of (hours, minutes, seconds) until self-destruct, default is (672, 0, 0).
            password (str): The password to use for the zip file, default is None.
            encryption_algorithm (str): The encryption algorithm to use for the zip file, default is 'AES'.
            mask_columns (list): The list of columns to mask, default is None.
            anonymize_columns (list): The list of columns to anonymize, default is None.
            compliance_check (bool): Whether to perform a  
            
            Args:
            file_name (str): The name of the file to be written, default is 'df'.
            file_format (str): The file format of the file to be written, default is 'csv'.
            self_destruct_time (tuple): A tuple of (hours, minutes, seconds) until self-destruct, default is (672, 0, 0).
            password (str): The password to use for the zip file, default is None.
            encryption_algorithm (str): The encryption algorithm to use for the zip file, default is 'AES'.
            mask_columns (list): The list of columns to mask, default is None.
            anonymize_columns (list): The list of columns to anonymize, default is None.
            compliance_check (bool): Whether to perform a compliance check on the data, default is False.
            audit_trail (bool): Whether to keep an audit trail, default is False.
            """
        self.file_name = file_name
        self.file_format = file_format
        self.self_destruct_time = self_destruct_time
        self.password = password
        self.encryption_algorithm = encryption_algorithm
        self.mask_columns = mask_columns
        self.anonymize_columns = anonymize_columns
        self.compliance_check = compliance_check
        self.audit_trail = audit_trail

    def zipit(self, df: pd.DataFrame) -> None:
        """
        Write the input DataFrame to a file, create a zip file with the written file, set a password for the zip file,
        and delete the written file.

        Args:
            df (pandas.DataFrame): The DataFrame to compress and encrypt.

        Returns:
            None

        Raises:
            None
        """
        if self.mask_columns is not None:
            df = self.mask_columns(df, self.mask_columns)

        if self.anonymize_columns is not None:
            df = self.anonymize_columns(df, self.anonymize_columns)

        write_func = getattr(df, f'to_{self.file_format}')
        write_func(self.file_name, index=False)

        df_zip = f"{self.file_name}.zip"
        with pyzipper.AESZipFile(df_zip, 'w', compression=pyzipper.ZIP_DEFLATED, encryption=getattr(pyzipper, f'WZ_{self.encryption_algorithm}')) as zf:
            if self.password is None:
                self.password = getpass.getpass('Enter password: ')
            zf.setpassword(self.password.encode('utf-8'))
            zf.write(self.file_name)

        os.remove(self.file_name)

        if self.self_destruct_time and self.self_destruct_time != False:
            t = threading.Thread(target=self.self_destruct,
                                    args=self.self_destruct_time)
            t.start()

    def self_destruct(self, hours: int, minutes: int, seconds: int) -> None:
        """Delete the compressed and encrypted file after a specified amount of time has elapsed.

        Args:
            hours (int): The number of hours until file deletion.
            minutes (int): The number of minutes until file deletion.
            seconds (int): The number of seconds until file deletion.

        Returns:
        None
        """
        df_zip = f"{self.file_name}.zip"
        self_destruct_time = time.time() + hours * 60 * 60 + minutes * 60 + seconds
        while True:
            if time.time() > self_destruct_time:
                os.remove(df_zip)
                break
            time.sleep(5)

    def mask_columns(self, df: pd.DataFrame, columns: List[str]) -> pd.DataFrame:
        """Mask sensitive data in the specified DataFrame columns by applying a SHA-256 hash function.

        Args:
            df (pandas.DataFrame): The DataFrame to mask sensitive data in.
            columns (list): A list of strings specifying the names of the columns to mask.

        Returns:
            pandas.DataFrame: A copy of the input DataFrame with the specified columns masked.
        """
        df = df.copy()
        for col in columns:
            df[col] = df[col].apply(
                lambda x: hashlib.sha256(str(x).encode()).hexdigest())
        return df

    def anonymize_columns(self, df: pd.DataFrame, columns: List[str]) -> pd.DataFrame:
        """Anonymize sensitive data in the specified DataFrame columns by replacing it with random characters.

        Args:
            df (pandas.DataFrame): The DataFrame to anonymize sensitive data in.
            columns (list): A
        list of strings specifying the names of the columns to anonymize.

        less
        Copy code
        Returns:
            pandas.DataFrame: A copy of the input DataFrame with the specified columns anonymized.
        """
        df = df.copy()
        for col in columns:
            df[col] = df[col].apply(lambda x: ''.join(
                random.choices(string.ascii_uppercase + string.digits, k=10)))
        return df

    
    
# zipminator/unzipit.py
import pandas as pd
import pyzipper
import time
import threading
import getpass
import os
import hashlib
import random
import string
import re


class Unzipndel:
    """Class for unzipping and reading a file using Zipminator.

    Attributes:
        file_name (str): The name of the file to be unzipped and read, default is 'df'.
        file_format (str): The file format of the file to be unzipped and read, default is 'csv'.

    Methods:
        unzipit(): Unzip the file, read it using pandas, and delete the unzipped file.
    """

    def __init__(self, file_name='df', file_format='csv'):
        self.file_name = file_name
        self.file_format = file_format

    def unzipit(self):
        """Unzip the file, read it using pandas, and delete the unzipped file.

        Returns:
            pd.DataFrame: A pandas dataframe containing the unzipped and read data.

        Raises:
            RuntimeError: If the password is incorrect or the file cannot be unzipped.
        """
        password = getpass.getpass('Password: ')
        with pyzipper.AESZipFile(f"{self.file_name}.zip") as zf:
            zf.setpassword(password.encode())
            zf.extract(self.file_name)

        read_func = getattr(pd, f'read_{self.file_format}')
        df = read_func(self.file_name)

        os.remove(self.file_name)

        return df



In [33]:
#Create a sample DataFrame

df = pd.DataFrame({'Name': ['John Doe', 'Jane Doe', 'Bob Smith'], 'Age': [25, 30, 40], 'SSN': ['123-45-6789', '987-65-4321', '555-55-5555']})

#Initialize a Zipndel object with default values

zipper = Zipndel(file_name='df0', file_format='csv', self_destruct_time=(0, 5, 0))

#Zip and encrypt the DataFrame
zipper.zipit(df)



AttributeError: 'DataFrame' object has no attribute 'name'

In [None]:
#Unzip and read the DataFrame
df = Unzipndel().unzipit()
df

In [35]:
#zipminator/zipit.py
import pandas as pd
import pyzipper
import time
import threading
import getpass
import os
import hashlib
import random
import string
from typing import List


class Zipndel:
    """Class for compressing and encrypting Pandas DataFrames and deleting the original file.

    Attributes:
        file_name (str): The name of the file to be written, default is 'df'.
        file_format (str): The file format of the file to be written, default is 'csv'.
        self_destruct_time (tuple): A tuple of (hours, minutes, seconds) until self-destruct, default is (672, 0, 0).
        password (str): The password to use for the zip file, default is None.
        encryption_algorithm (str): The encryption algorithm to use for the zip file, default is 'AES'.
        mask_columns (list): The list of columns to mask, default is None.
        anonymize_columns (list): The list of columns to anonymize, default is None.
        compliance_check (bool): Whether to perform a compliance check on the data, default is False.
        audit_trail (bool): Whether to keep an audit trail, default is False.

    Methods:
        mask_columns(df: pd.DataFrame, columns: List[str]) -> pd.DataFrame:
            Mask sensitive data in the specified DataFrame columns by applying a SHA-256 hash function.

        anonymize_columns(df: pd.DataFrame, columns: List[str]) -> pd.DataFrame:
            Anonymize sensitive data in the specified DataFrame columns by replacing it with random characters.

        zipit(df: pd.DataFrame) -> None:
            Write the input DataFrame to a file, create a zip file with the written file, set a password for the zip file,
            and delete the written file.

        self_destruct(hours: int, minutes: int, seconds: int) -> None:
            Delete the compressed and encrypted file after a specified amount of time.

        decompress_and_read() -> pd.DataFrame:
            Unzip the file, read it using pandas, and delete the unzipped file.

    Example:
        >>> df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]})
        >>> Zipndel(file_name='my_file', password='my_password', mask_columns=['B'], anonymize_columns=['C']).zipit(df)
    """

    def __init__(self, file_name='df', file_format='csv', self_destruct_time=(672, 0, 0), password=None, encryption_algorithm='AES', mask_columns=None, anonymize_columns=None, compliance_check=False, audit_trail=False):
        """
        Initialize the Zipndel object.

        Args:
            file_name (str): The name of the file to be written, default is 'df'.
            file_format (str): The file format of the file to be written, default is 'csv'.
            self_destruct_time (tuple): A tuple of (hours, minutes, seconds) until self-destruct, default is (672, 0, 0).
            password (str): The password to use for the zip file, default is None.
            encryption_algorithm (str): The encryption algorithm to use for the zip file, default is 'AES'.
            mask_columns (list): The list of columns to mask, default is None.
            anonymize_columns (list): The list of columns to anonymize, default is None.
            compliance_check (bool): Whether to perform a  
            
            Args:
            file_name (str): The name of the file to be written, default is 'df'.
            file_format (str): The file format of the file to be written, default is 'csv'.
            self_destruct_time (tuple): A tuple of (hours, minutes, seconds) until self-destruct, default is (672, 0, 0).
            password (str): The password to use for the zip file, default is None.
            encryption_algorithm (str): The encryption algorithm to use for the zip file, default is 'AES'.
            mask_columns (list): The list of columns to mask, default is None.
            anonymize_columns (list): The list of columns to anonymize, default is None.
            compliance_check (bool): Whether to perform a compliance check on the data, default is False.
            audit_trail (bool): Whether to keep an audit trail, default is False.
            """
        self.file_name = file_name
        self.file_format = file_format
        self.self_destruct_time = self_destruct_time
        self.password = password
        self.encryption_algorithm = encryption_algorithm
        self.mask_columns = mask_columns
        self.anonymize_columns = anonymize_columns
        self.compliance_check = compliance_check
        self.audit_trail = audit_trail

    def zipit(self, df: pd.DataFrame) -> None:
        """
        Write the input DataFrame to a file, create a zip file with the written file, set a password for the zip file,
        and delete the written file.

        Args:
            df (pandas.DataFrame): The DataFrame to compress and encrypt.

        Returns:
            None

        Raises:
            None
        """
        if self.mask_columns is not None:
            df = self.mask_columns(df, self.mask_columns)

        if self.anonymize_columns is not None:
            df = self.anonymize_columns(df, self.anonymize_columns)

        write_func = getattr(df, f'to_{self.file_format}')
        write_func(self.file_name, index=False)

        df_zip = f"{self.file_name}.zip"
        with pyzipper.AESZipFile(df_zip, 'w', compression=pyzipper.ZIP_DEFLATED, encryption=getattr(pyzipper, f'WZ_{self.encryption_algorithm}')) as zf:
            if self.password is None:
                self.password = getpass.getpass('Enter password: ')
            zf.setpassword(self.password.encode('utf-8'))
            zf.write(self.file_name)

        os.remove(self.file_name)

        if self.self_destruct_time and self.self_destruct_time != False:
            t = threading.Thread(target=self.self_destruct,
                                    args=self.self_destruct_time)
            t.start()

    def self_destruct(self, hours: int, minutes: int, seconds: int) -> None:
        """Delete the compressed and encrypted file after a specified amount of time has elapsed.

        Args:
            hours (int): The number of hours until file deletion.
            minutes (int): The number of minutes until file deletion.
            seconds (int): The number of seconds until file deletion.

        Returns:
        None
        """
        df_zip = f"{self.file_name}.zip"
        self_destruct_time = time.time() + hours * 60 * 60 + minutes * 60 + seconds
        while True:
            if time.time() > self_destruct_time:
                os.remove(df_zip)
                break
            time.sleep(5)

    def mask_columns(self, df: pd.DataFrame, columns: List[str]) -> pd.DataFrame:
        """Mask sensitive data in the specified DataFrame columns by applying a SHA-256 hash function.

        Args:
            df (pandas.DataFrame): The DataFrame to mask sensitive data in.
            columns (list): A list of strings specifying the names of the columns to mask.

        Returns:
            pandas.DataFrame: A copy of the input DataFrame with the specified columns masked.
        """
        df = df.copy()
        for col in columns:
            df[col] = df[col].apply(
                lambda x: hashlib.sha256(str(x).encode()).hexdigest())
        return df

    def anonymize_columns(self, df: pd.DataFrame, columns: List[str]) -> pd.DataFrame:
        """Anonymize sensitive data in the specified DataFrame columns by replacing it with random characters.

        Args:
            df (pandas.DataFrame): The DataFrame to anonymize sensitive data in.
            columns (list): A
        list of strings specifying the names of the columns to anonymize.

        less
        Copy code
        Returns:
            pandas.DataFrame: A copy of the input DataFrame with the specified columns anonymized.
        """
        df = df.copy()
        for col in columns:
            df[col] = df[col].apply(lambda x: ''.join(
                random.choices(string.ascii_uppercase + string.digits, k=10)))
        return df

    
    
# zipminator/unzipit.py
import pandas as pd
import pyzipper
import time
import threading
import getpass
import os
import hashlib
import random
import string
import re


class Unzipndel:
    """Class for unzipping and reading a file using Zipminator.

    Attributes:
        file_name (str): The name of the file to be unzipped and read, default is 'df'.
        file_format (str): The file format of the file to be unzipped and read, default is 'csv'.

    Methods:
        unzipit(): Unzip the file, read it using pandas, and delete the unzipped file.
    """

    def __init__(self, file_name='df', file_format='csv'):
        self.file_name = file_name
        self.file_format = file_format

    def unzipit(self):
        """Unzip the file, read it using pandas, and delete the unzipped file.

        Returns:
            pd.DataFrame: A pandas dataframe containing the unzipped and read data.

        Raises:
            RuntimeError: If the password is incorrect or the file cannot be unzipped.
        """
        password = getpass.getpass('Password: ')
        with pyzipper.AESZipFile(f"{self.file_name}.zip") as zf:
            zf.setpassword(password.encode())
            zf.extract(self.file_name)

        read_func = getattr(pd, f'read_{self.file_format}')
        df = read_func(self.file_name)

        os.remove(self.file_name)

        return df

    
import pandas as pd

# lager en DataFrame med info om ansatte
ansatte = pd.DataFrame({
    'Ident': [150706, 150707, 150708, 150708, 150709],
    'Navn': ['Mo', 'Kari', 'Ola', 'Olga', 'Ali'],
    'Email': ['mo@nav.no', 'kari@nav.no', 'ola@nav.no', 'olga@nav.no', 'ali@nav.no'],
    'Phone': ['+4798079896', '+4798765434', '+4799887766', '+4745983421', '+4745989933'],
    'Address': ['moveien 9', 'Kari gate 34', 'Ola parken 17', 'Olga smuget 55', 'Ali alé 99'],
    'By': ['Oslo', 'Bærum', 'Asker', 'Oslo', 'Oslo'],
    'Fylke': ['Oslo', 'Viken', 'Viken', 'Oslo', 'Oslo'],
    'Zip': ['0456', '1550', '1750', '0954', '0134'],
    'Ans_siden': ['2020-01-01', '2019-01-02', '2017-01-03', '2022-01-04', '2022-01-05'],
    'Lonn': [780000, 820000, 915000, 825000, 917580]
})

# Initialize a Zipndel object with custom values
zipper = Zipndel(
    file_name='ansatte',
    file_format='csv',
    self_destruct_time=(0, 5, 0), # 24 hours until self-destruct
    #password='my_password',
    encryption_algorithm='AES',
    mask_columns=['Email', 'Phone'],
    anonymize_columns=['Adress', 'Lonn'],
    compliance_check=True,
    audit_trail=True
)

zipper.zipit(df)

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[47], line 17
      1 #zipndel.zipit(ansatte)
      2 
      3 #df = Unzipndel().unzipit(ansatte)
      4 # Initialize a Zipndel object with custom values
      5 zipper = Zipndel(
      6     file_name='df',
      7     file_format='csv',
   (...)
     14     audit_trail=True
     15 )
---> 17 zipper.zipit(df)

Cell In[35], line 100, in Zipndel.zipit(self, df)
     86 """
     87 Write the input DataFrame to a file, create a zip file with the written file, set a password for the zip file,
     88 and delete the written file.
   (...)
     97     None
     98 """
     99 if self.mask_columns is not None:
--> 100     df = self.mask_columns(df, self.mask_columns)
    102 if self.anonymize_columns is not None:
    103     df = self.anonymize_columns(df, self.anonymize_columns)

TypeError: 'list' object is not callable


        if self.mask_columns is not None:
            df = self.mask_columns(df, self.mask_columns)

        if self.anonymize_columns is not None:
            df = self.anonymize_columns(df, self.anonymize_columns)

        write_func = getattr(df, f'to_{self.file_format}')
        write_func(self.file_name, index=False)

        df_zip = f"{self.file_name}.zip"
        with pyzipper.AESZipFile(df_zip, 'w', compression=pyzipper.ZIP_DEFLATED, encryption=getattr(pyzipper, f'WZ_{self.encryption_algorithm}')) as zf:
            if self.password is None:
                self.password = getpass.getpass('Enter password: ')
            zf.setpassword(self.password.encode('utf-8'))
            zf.write(self.file_name)

        os.remove(self.file_name)

        if self.self_destruct_time and self.self_destruct_time != False:
            t = threading.Thread(target=self.self_destruct,
                                    args=self.self_destruct_time)
            t.start()


In [None]:
#zipminator/zipit.py
import pandas as pd
import pyzipper
import time
import threading
import getpass
import os
import hashlib
import random
import string
from typing import List


class Zipndel:
    """Class for compressing and encrypting Pandas DataFrames and deleting the original file.

    Attributes:
        file_name (str): The name of the file to be written, default is 'df'.
        file_format (str): The file format of the file to be written, default is 'csv'.
        self_destruct_time (tuple): A tuple of (hours, minutes, seconds) until self-destruct, default is (672, 0, 0).
        password (str): The password to use for the zip file, default is None.
        encryption_algorithm (str): The encryption algorithm to use for the zip file, default is 'AES'.
        mask_columns (list): The list of columns to mask, default is None.
        anonymize_columns (list): The list of columns to anonymize, default is None.
        compliance_check (bool): Whether to perform a compliance check on the data, default is False.
        audit_trail (bool): Whether to keep an audit trail, default is False.

    Methods:
        mask_columns(df: pd.DataFrame, columns: List[str]) -> pd.DataFrame:
            Mask sensitive data in the specified DataFrame columns by applying a SHA-256 hash function.

        anonymize_columns(df: pd.DataFrame, columns: List[str]) -> pd.DataFrame:
            Anonymize sensitive data in the specified DataFrame columns by replacing it with random characters.

        zipit(df: pd.DataFrame) -> None:
            Write the input DataFrame to a file, create a zip file with the written file, set a password for the zip file,
            and delete the written file.

        self_destruct(hours: int, minutes: int, seconds: int) -> None:
            Delete the compressed and encrypted file after a specified amount of time.

        decompress_and_read() -> pd.DataFrame:
            Unzip the file, read it using pandas, and delete the unzipped file.

    Example:
        >>> df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]})
        >>> Zipndel(file_name='my_file', password='my_password', mask_columns=['B'], anonymize_columns=['C']).zipit(df)
    """

    def __init__(self, file_name='df', file_format='csv', self_destruct_time=(672, 0, 0), password=None, encryption_algorithm='AES', mask_columns=None, anonymize_columns=None, compliance_check=False, audit_trail=False):
        """
        Initialize the Zipndel object.

        Args:
            file_name (str): The name of the file to be written, default is 'df'.
            file_format (str): The file format of the file to be written, default is 'csv'.
            self_destruct_time (tuple): A tuple of (hours, minutes, seconds) until self-destruct, default is (672, 0, 0).
            password (str): The password to use for the zip file, default is None.
            encryption_algorithm (str): The encryption algorithm to use for the zip file, default is 'AES'.
            mask_columns (list): The list of columns to mask, default is None.
            anonymize_columns (list): The list of columns to anonymize, default is None.
            compliance_check (bool): Whether to perform a  
            
            Args:
            file_name (str): The name of the file to be written, default is 'df'.
            file_format (str): The file format of the file to be written, default is 'csv'.
            self_destruct_time (tuple): A tuple of (hours, minutes, seconds) until self-destruct, default is (672, 0, 0).
            password (str): The password to use for the zip file, default is None.
            encryption_algorithm (str): The encryption algorithm to use for the zip file, default is 'AES'.
            mask_columns (list): The list of columns to mask, default is None.
            anonymize_columns (list): The list of columns to anonymize, default is None.
            compliance_check (bool): Whether to perform a compliance check on the data, default is False.
            audit_trail (bool): Whether to keep an audit trail, default is False.
            """
        self.file_name = file_name
        self.file_format = file_format
        self.self_destruct_time = self_destruct_time
        self.password = password
        self.encryption_algorithm = encryption_algorithm
        self.mask_columns = mask_columns
        self.anonymize_columns = anonymize_columns
        self.compliance_check = compliance_check
        self.audit_trail = audit_trail

    def zipit(self, df: pd.DataFrame) -> None:
        """
        Write the input DataFrame to a file, create a zip file with the written file, set a password for the zip file,
        and delete the written file.

        Args:
            df (pandas.DataFrame): The DataFrame to compress and encrypt.

        Returns:
            None

        Raises:
            None
        """
        if self.mask_columns is not None:
            df = self.mask_columns(df, self.mask_columns)

        if self.anonymize_columns is not None:
            df = self.anonymize_columns(df, self.anonymize_columns)

        write_func = getattr(df, f'to_{self.file_format}')
        write_func(self.file_name, index=False)

        df_zip = f"{self.file_name}.zip"
        with pyzipper.AESZipFile(df_zip, 'w', compression=pyzipper.ZIP_DEFLATED, encryption=getattr(pyzipper, f'WZ_{self.encryption_algorithm}')) as zf:
            if self.password is None:
                self.password = getpass.getpass('Enter password: ')
            zf.setpassword(self.password.encode('utf-8'))
            zf.write(self.file_name)

        os.remove(self.file_name)

        if self.self_destruct_time and self.self_destruct_time != False:
            t = threading.Thread(target=self.self_destruct,
                                    args=self.self_destruct_time)
            t.start()

    def self_destruct(self, hours: int, minutes: int, seconds: int) -> None:
        """Delete the compressed and encrypted file after a specified amount of time has elapsed.

        Args:
            hours (int): The number of hours until file deletion.
            minutes (int): The number of minutes until file deletion.
            seconds (int): The number of seconds until file deletion.

        Returns:
        None
        """
        df_zip = f"{self.file_name}.zip"
        self_destruct_time = time.time() + hours * 60 * 60 + minutes * 60 + seconds
        while True:
            if time.time() > self_destruct_time:
                os.remove(df_zip)
                break
            time.sleep(5)

    def mask_columns(self, df: pd.DataFrame, columns: List[str]) -> pd.DataFrame:
        """Mask sensitive data in the specified DataFrame columns by applying a SHA-256 hash function.

        Args:
            df (pandas.DataFrame): The DataFrame to mask sensitive data in.
            columns (list): A list of strings specifying the names of the columns to mask.

        Returns:
            pandas.DataFrame: A copy of the input DataFrame with the specified columns masked.
        """
        df = df.copy()
        for col in columns:
            df[col] = df[col].apply(
                lambda x: hashlib.sha256(str(x).encode()).hexdigest())
        return df

    def anonymize_columns(self, df: pd.DataFrame, columns: List[str]) -> pd.DataFrame:
        """Anonymize sensitive data in the specified DataFrame columns by replacing it with random characters.

        Args:
            df (pandas.DataFrame): The DataFrame to anonymize sensitive data in.
            columns (list): A
        list of strings specifying the names of the columns to anonymize.

        less
        Copy code
        Returns:
            pandas.DataFrame: A copy of the input DataFrame with the specified columns anonymized.
        """
        df = df.copy()
        for col in columns:
            df[col] = df[col].apply(lambda x: ''.join(
                random.choices(string.ascii_uppercase + string.digits, k=10)))
        return df

    
    
# zipminator/unzipit.py
import pandas as pd
import pyzipper
import time
import threading
import getpass
import os
import hashlib
import random
import string
import re


class Unzipndel:
    """Class for unzipping and reading a file using Zipminator.

    Attributes:
        file_name (str): The name of the file to be unzipped and read, default is 'df'.
        file_format (str): The file format of the file to be unzipped and read, default is 'csv'.

    Methods:
        unzipit(): Unzip the file, read it using pandas, and delete the unzipped file.
    """

    def __init__(self, file_name='df', file_format='csv'):
        self.file_name = file_name
        self.file_format = file_format

    def unzipit(self):
        """Unzip the file, read it using pandas, and delete the unzipped file.

        Returns:
            pd.DataFrame: A pandas dataframe containing the unzipped and read data.

        Raises:
            RuntimeError: If the password is incorrect or the file cannot be unzipped.
        """
        password = getpass.getpass('Password: ')
        with pyzipper.AESZipFile(f"{self.file_name}.zip") as zf:
            zf.setpassword(password.encode())
            zf.extract(self.file_name)

        read_func = getattr(pd, f'read_{self.file_format}')
        df = read_func(self.file_name)

        os.remove(self.file_name)

        return df

    
import pandas as pd

# lager en DataFrame med info om ansatte
ansatte = pd.DataFrame({
    'Ident': [150706, 150707, 150708, 150708, 150709],
    'Navn': ['Mo', 'Kari', 'Ola', 'Olga', 'Ali'],
    'Email': ['mo@nav.no', 'kari@nav.no', 'ola@nav.no', 'olga@nav.no', 'ali@nav.no'],
    'Phone': ['+4798079896', '+4798765434', '+4799887766', '+4745983421', '+4745989933'],
    'Address': ['moveien 9', 'Kari gate 34', 'Ola parken 17', 'Olga smuget 55', 'Ali alé 99'],
    'By': ['Oslo', 'Bærum', 'Asker', 'Oslo', 'Oslo'],
    'Fylke': ['Oslo', 'Viken', 'Viken', 'Oslo', 'Oslo'],
    'Zip': ['0456', '1550', '1750', '0954', '0134'],
    'Ans_siden': ['2020-01-01', '2019-01-02', '2017-01-03', '2022-01-04', '2022-01-05'],
    'Lonn': [780000, 820000, 915000, 825000, 917580]
})

# Initialize a Zipndel object with custom values
zipper = Zipndel(
    file_name='ansatte',
    file_format='csv',
    self_destruct_time=(0, 5, 0), # 24 hours until self-destruct
    #password='my_password',
    encryption_algorithm='AES',
    mask_columns=['Email', 'Phone'],
    anonymize_columns=['Adress', 'Lonn'],
    compliance_check=True,
    audit_trail=True
)

zipper.zipit(df)

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[47], line 17
      1 #zipndel.zipit(ansatte)
      2 
      3 #df = Unzipndel().unzipit(ansatte)
      4 # Initialize a Zipndel object with custom values
      5 zipper = Zipndel(
      6     file_name='df',
      7     file_format='csv',
   (...)
     14     audit_trail=True
     15 )
---> 17 zipper.zipit(df)

Cell In[35], line 100, in Zipndel.zipit(self, df)
     86 """
     87 Write the input DataFrame to a file, create a zip file with the written file, set a password for the zip file,
     88 and delete the written file.
   (...)
     97     None
     98 """
     99 if self.mask_columns is not None:
--> 100     df = self.mask_columns(df, self.mask_columns)
    102 if self.anonymize_columns is not None:
    103     df = self.anonymize_columns(df, self.anonymize_columns)

TypeError: 'list' object is not callable





In [36]:
#Unzip and read the DataFrame

df0 = Unzipndel().unzipit()
df0

Password:  ········


Unnamed: 0,Ident,Navn,Email,Phone,Address,By,Fylke,Zip,Ans_siden,Lonn,Pnr
0,150706,Mo,mo@nav.no,4798079896,moveien 9,Oslo,Oslo,456,2020-01-01,780000,12394596789
1,150707,Kari,kari@nav.no,4798765434,Kari gate 34,Bærum,Viken,1550,2019-01-02,820000,98786594321
2,150708,Ola,ola@nav.no,4799887766,Ola parken 17,Asker,Viken,1750,2017-01-03,915000,55585585555
3,150708,Olga,olga@nav.no,4745983421,Olga smuget 55,Oslo,Oslo,954,2022-01-04,825000,11152233333
4,150709,Ali,ali@nav.no,4745989933,Ali alé 99,Oslo,Oslo,134,2022-01-05,917580,44464484444


In [38]:
df1 = pd.DataFrame({'Name': ['John Doe', 'Jane Doe', 'Bob Smith'], 'Age': [25, 30, 40], 'SSN': ['123-45-6789', '987-65-4321', '555-55-5555']})

In [41]:
zipper = Zipndel(
    file_name='foo',
    file_format='csv',
    self_destruct_time=(0, 2, 0), # 24 hours until self-destruct
    #password='my_password',
    encryption_algorithm='AES'
)
zipper.zipit(df1)

Enter password:  ········


In [42]:
unzipndel = Unzipndel(file_name='foo', file_format='csv')
df2 = unzipndel.unzipit()
df2

Password:  ········


Unnamed: 0,Name,Age,SSN
0,John Doe,25,123-45-6789
1,Jane Doe,30,987-65-4321
2,Bob Smith,40,555-55-5555


In [45]:
df = df0.copy();df

Unnamed: 0,Ident,Navn,Email,Phone,Address,By,Fylke,Zip,Ans_siden,Lonn,Pnr
0,150706,Mo,mo@nav.no,4798079896,moveien 9,Oslo,Oslo,456,2020-01-01,780000,12394596789
1,150707,Kari,kari@nav.no,4798765434,Kari gate 34,Bærum,Viken,1550,2019-01-02,820000,98786594321
2,150708,Ola,ola@nav.no,4799887766,Ola parken 17,Asker,Viken,1750,2017-01-03,915000,55585585555
3,150708,Olga,olga@nav.no,4745983421,Olga smuget 55,Oslo,Oslo,954,2022-01-04,825000,11152233333
4,150709,Ali,ali@nav.no,4745989933,Ali alé 99,Oslo,Oslo,134,2022-01-05,917580,44464484444


In [47]:
import pandas as pd

# lager en DataFrame med info om ansatte
ansatte = pd.DataFrame({
    'Ident': [150706, 150707, 150708, 150708, 150709],
    'Navn': ['Mo', 'Kari', 'Ola', 'Olga', 'Ali'],
    'Email': ['mo@nav.no', 'kari@nav.no', 'ola@nav.no', 'olga@nav.no', 'ali@nav.no'],
    'Phone': ['+4798079896', '+4798765434', '+4799887766', '+4745983421', '+4745989933'],
    'Address': ['moveien 9', 'Kari gate 34', 'Ola parken 17', 'Olga smuget 55', 'Ali alé 99'],
    'By': ['Oslo', 'Bærum', 'Asker', 'Oslo', 'Oslo'],
    'Fylke': ['Oslo', 'Viken', 'Viken', 'Oslo', 'Oslo'],
    'Zip': ['0456', '1550', '1750', '0954', '0134'],
    'Ans_siden': ['2020-01-01', '2019-01-02', '2017-01-03', '2022-01-04', '2022-01-05'],
    'Lonn': [780000, 820000, 915000, 825000, 917580]
})

# Initialize a Zipndel object with custom values
zipper = Zipndel(
    file_name='ansatte',
    file_format='csv',
    self_destruct_time=(0, 5, 0), # 24 hours until self-destruct
    #password='my_password',
    encryption_algorithm='AES',
    mask_columns=['Email', 'Phone'],
    anonymize_columns=['Adress', 'Lonn'],
    compliance_check=True,
    audit_trail=True
)

zipper.zipit(df)

TypeError: 'list' object is not callable

In [None]:
Zipndel(file_name='my_file', password='my_password', mask_columns=['B'], anonymize_columns=['C']).zipit(df)

In [17]:
import pandas as pd

# lager en DataFrame med info om ansatte
ansatte = pd.DataFrame({
    'Ident': [150706, 150707, 150708, 150708, 150709],
    'Navn': ['Mo', 'Kari', 'Ola', 'Olga', 'Ali'],
    'Email': ['mo@nav.no', 'kari@nav.no', 'ola@nav.no', 'olga@nav.no', 'ali@nav.no'],
    'Phone': ['+4798079896', '+4798765434', '+4799887766', '+4745983421', '+4745989933'],
    'Address': ['moveien 9', 'Kari gate 34', 'Ola parken 17', 'Olga smuget 55', 'Ali alé 99'],
    'By': ['Oslo', 'Bærum', 'Asker', 'Oslo', 'Oslo'],
    'Fylke': ['Oslo', 'Viken', 'Viken', 'Oslo', 'Oslo'],
    'Zip': ['0456', '1550', '1750', '0954', '0134'],
    'Ans_siden': ['2020-01-01', '2019-01-02', '2017-01-03', '2022-01-04', '2022-01-05'],
    'Lonn': [780000, 820000, 915000, 825000, 917580]
})

# Print DataFrame
print(ansatte)

    Ident  Navn        Email        Phone         Address     By  Fylke   Zip  \
0  150706    Mo    mo@nav.no  +4798079896       moveien 9   Oslo   Oslo  0456   
1  150707  Kari  kari@nav.no  +4798765434    Kari gate 34  Bærum  Viken  1550   
2  150708   Ola   ola@nav.no  +4799887766   Ola parken 17  Asker  Viken  1750   
3  150708  Olga  olga@nav.no  +4745983421  Olga smuget 55   Oslo   Oslo  0954   
4  150709   Ali   ali@nav.no  +4745989933      Ali alé 99   Oslo   Oslo  0134   

    Ans_siden    Lonn  
0  2020-01-01  780000  
1  2019-01-02  820000  
2  2017-01-03  915000  
3  2022-01-04  825000  
4  2022-01-05  917580  


In [18]:
# legger inn sensitive column (SSN)
ansatte['Pnr'] = ['12394596789', '98786594321', '55585585555', '11152233333', '44464484444']

# Print DataFrame
print(ansatte)

    Ident  Navn        Email        Phone         Address     By  Fylke   Zip  \
0  150706    Mo    mo@nav.no  +4798079896       moveien 9   Oslo   Oslo  0456   
1  150707  Kari  kari@nav.no  +4798765434    Kari gate 34  Bærum  Viken  1550   
2  150708   Ola   ola@nav.no  +4799887766   Ola parken 17  Asker  Viken  1750   
3  150708  Olga  olga@nav.no  +4745983421  Olga smuget 55   Oslo   Oslo  0954   
4  150709   Ali   ali@nav.no  +4745989933      Ali alé 99   Oslo   Oslo  0134   

    Ans_siden    Lonn          Pnr  
0  2020-01-01  780000  12394596789  
1  2019-01-02  820000  98786594321  
2  2017-01-03  915000  55585585555  
3  2022-01-04  825000  11152233333  
4  2022-01-05  917580  44464484444  


In [26]:
#Initialize a Zipndel object with default values

zipper = Zipndel()

#Zip and encrypt the DataFrame

zipper.zipit(ansatte)

Enter password:  ········


In [23]:
#Zip and encrypt the DataFrame

zipper.zipit(ansatte)

In [21]:
#Unzip and read the DataFrame

df = Unzipndel().unzipit()
df

Password:  ········


Unnamed: 0,Name,Age,SSN
0,John Doe,25,123-45-6789
1,Jane Doe,30,987-65-4321
2,Bob Smith,40,555-55-5555


In [19]:
#from zipminator.zipit import Zipndel

# Initialize a Zipndel object with default values
zipper = Zipndel()

In [None]:
import pandas as pd

# lager en DataFrame med info om ansatte
ansatte = pd.DataFrame({
    'Ident': [150706, 150707, 150708, 150708, 150709],
    'Navn': ['Mo', 'Kari', 'Ola', 'Olga', 'Ali'],
    'Email': ['mo@nav.no', 'kari@nav.no', 'ola@nav.no', 'olga@nav.no', 'ali@nav.no'],
    'Phone': ['+4798079896', '+4798765434', '+4799887766', '+4745983421', '+4745989933'],
    'Address': ['moveien 9', 'Kari gate 34', 'Ola parken 17', 'Olga smuget 55', 'Ali alé 99'],
    'By': ['Oslo', 'Bærum', 'Asker', 'Oslo', 'Oslo'],
    'Fylke': ['Oslo', 'Viken', 'Viken', 'Oslo', 'Oslo'],
    'Zip': ['0456', '1550', '1750', '0954', '0134'],
    'Ans_siden': ['2020-01-01', '2019-01-02', '2017-01-03', '2022-01-04', '2022-01-05'],
    'Lonn': [780000, 820000, 915000, 825000, 917580]
})
Zipndel().zipit(ansatte) Must write DataFrame `ansatte` to `ansatte.csv` and `ansatte.zip` as default!
This is a must, again Zipndel().zipit(ansatte) must and must write DataFrame `ansatte` to `ansatte.csv` and `ansatte.zip` as default!
Other example usages are:
    
zipper = Zipndel(
file_name='ansatte', #name of DataFrame will automatically be the file_name
file_format='csv',
self_destruct_time=(0, 1, 0), # 1min until self-destruct, else default is 672H as usual
#password='my_password',
encryption_algorithm='AES',
mask_columns=['column1', 'column2'],
anonymize_columns=['column3', 'column4'],
compliance_check=True,
audit_trail=True
)

Will .zipit(ansatte) pack ansatte.csv inside ansatte.zip such that I can 
zipper = Zipndel(file_name='ansatte', self_destruct_time=(0, 1, 0))
#Initialize a Zipndel object with default values

zipper = Zipndel()

#Zip and encrypt the DataFrame




zipndel.zipit(ansatte)

df = Unzipndel().unzipit(ansatte)
# Initialize a Zipndel object with custom values
zipper = Zipndel(
    file_name='ansatte',
    file_format='csv',
    self_destruct_time=(0, 1, 0), # 24 hours until self-destruct
    #password='my_password',
    encryption_algorithm='AES',
    mask_columns=['column1', 'column2'],
    anonymize_columns=['column3', 'column4'],
    compliance_check=True,
    audit_trail=True
)

Will 

In [None]:
As defaul; if the name of DataFrame is `df` then Zipndel has to create `df.csv` and encrypt and password protect it inside `df.zip`.
However if the user choose to call their DataFrame for `df0` then Zipndel has to create `df0.csv` and encrypt and password protect it inside `df0.zip`. 
Or for instance they choose to call their DataFrame for `foo`, then Zipndel has to create `foo.csv` and encrypt and password protect it inside `foo.zip`.
Modify Zipndel, only the part that will enable the user to choose whatever naming they want so they can Unzipndel the same zip file
# Example:
#Create a sample DataFrame

df0 = pd.DataFrame({'Name': ['John Doe', 'Jane Doe', 'Bob Smith'], 'Age': [25, 30, 40], 'SSN': ['123-45-6789', '987-65-4321', '555-55-5555']})

#Initialize a Zipndel object with default values
zipper = Zipndel()

#Zip and encrypt the DataFrame
zipper.zipit(df0)

This will pack df0 in `df0.csv` password protect inside `df0.zip`  prompt the user for password such  
#Unzip and read the DataFrame

df0 = Unzipndel().unzipit()
df0

after user prompt same password. Drop docstring in this modification

In [27]:
#zipminator/zipit.py
import pandas as pd
import pyzipper
import time
import threading
import getpass
import os
import hashlib
import random
import string
from typing import List


class Zipndel:
    """Class for compressing and encrypting Pandas DataFrames and deleting the original file.

    Attributes:
        file_name (str): The name of the file to be written, default is 'df'.
        file_format (str): The file format of the file to be written, default is 'csv'.
        self_destruct_time (tuple): A tuple of (hours, minutes, seconds) until self-destruct, default is (672, 0, 0).
        password (str): The password to use for the zip file, default is None.
        encryption_algorithm (str): The encryption algorithm to use for the zip file, default is 'AES'.
        mask_columns (list): The list of columns to mask, default is None.
        anonymize_columns (list): The list of columns to anonymize, default is None.
        compliance_check (bool): Whether to perform a compliance check on the data, default is False.
        audit_trail (bool): Whether to keep an audit trail, default is False.

    Methods:
        mask_columns(df: pd.DataFrame, columns: List[str]) -> pd.DataFrame:
            Mask sensitive data in the specified DataFrame columns by applying a SHA-256 hash function.

        anonymize_columns(df: pd.DataFrame, columns: List[str]) -> pd.DataFrame:
            Anonymize sensitive data in the specified DataFrame columns by replacing it with random characters.

        zipit(df: pd.DataFrame) -> None:
            Write the input DataFrame to a file, create a zip file with the written file, set a password for the zip file,
            and delete the written file.

        self_destruct(hours: int, minutes: int, seconds: int) -> None:
            Delete the compressed and encrypted file after a specified amount of time.

        decompress_and_read() -> pd.DataFrame:
            Unzip the file, read it using pandas, and delete the unzipped file.

    Example:
        >>> df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]})
        >>> Zipndel(file_name='my_file', password='my_password', mask_columns=['B'], anonymize_columns=['C']).zipit(df)
    """

    def __init__(self, file_name='df', file_format='csv', self_destruct_time=(672, 0, 0), password=None, encryption_algorithm='AES', mask_columns=None, anonymize_columns=None, compliance_check=False, audit_trail=False):
        """
        Initialize the Zipndel object.

        Args:
            file_name (str): The name of the file to be written, default is 'df'.
            file_format (str): The file format of the file to be written, default is 'csv'.
            self_destruct_time (tuple): A tuple of (hours, minutes, seconds) until self-destruct, default is (672, 0, 0).
            password (str): The password to use for the zip file, default is None.
            encryption_algorithm (str): The encryption algorithm to use for the zip file, default is 'AES'.
            mask_columns (list): The list of columns to mask, default is None.
            anonymize_columns (list): The list of columns to anonymize, default is None.
            compliance_check (bool): Whether to perform a  
            
            Args:
            file_name (str): The name of the file to be written, default is 'df'.
            file_format (str): The file format of the file to be written, default is 'csv'.
            self_destruct_time (tuple): A tuple of (hours, minutes, seconds) until self-destruct, default is (672, 0, 0).
            password (str): The password to use for the zip file, default is None.
            encryption_algorithm (str): The encryption algorithm to use for the zip file, default is 'AES'.
            mask_columns (list): The list of columns to mask, default is None.
            anonymize_columns (list): The list of columns to anonymize, default is None.
            compliance_check (bool): Whether to perform a compliance check on the data, default is False.
            audit_trail (bool): Whether to keep an audit trail, default is False.
            """
        self.file_name = file_name
        self.file_format = file_format
        self.self_destruct_time = self_destruct_time
        self.password = password
        self.encryption_algorithm = encryption_algorithm
        self.mask_columns = mask_columns
        self.anonymize_columns = anonymize_columns
        self.compliance_check = compliance_check
        self.audit_trail = audit_trail

class Zipndel:
    def zipit(self, df: pd.DataFrame, df_name: str) -> None:
        """
        Write the input DataFrame to a file, create a zip file with the written file, set a password for the zip file,
        and delete the written file.

        Args:
            df (pandas.DataFrame): The DataFrame to compress and encrypt.
            df_name (str): The name of the DataFrame to use for the file name and the zip file name.

        Returns:
            None

        Raises:
            None
        """
        if self.mask_columns is not None:
            df = self.mask_columns(df, self.mask_columns)

        if self.anonymize_columns is not None:
            df = self.anonymize_columns(df, self.anonymize_columns)

        write_func = getattr(df, f'to_{self.file_format}')
        write_func(f"{df_name}.{self.file_format}", index=False)

        df_zip = f"{df_name}.zip"
        with pyzipper.AESZipFile(df_zip, 'w', compression=pyzipper.ZIP_DEFLATED, encryption=getattr(pyzipper, f'WZ_{self.encryption_algorithm}')) as zf:
            if self.password is None:
                self.password = getpass.getpass('Enter password: ')
            zf.setpassword(self.password.encode('utf-8'))
            zf.write(f"{df_name}.{self.file_format}")

        os.remove(f"{df_name}.{self.file_format}")

        if self.self_destruct_time and self.self_destruct_time != False:
            t = threading.Thread(target=self.self_destruct,
                                    args=self.self_destruct_time)
            t.start()

    def self_destruct(self, hours: int, minutes: int, seconds: int) -> None:
        """Delete the compressed and encrypted file after a specified amount of time has elapsed.

        Args:
            hours (int): The number of hours until file deletion.
            minutes (int): The number of minutes until file deletion.
            seconds (int): The number of seconds until file deletion.

        Returns:
        None
        """
        df_zip = f"{self.file_name}.zip"
        self_destruct_time = time.time() + hours * 60 * 60 + minutes * 60 + seconds
        while True:
            if time.time() > self_destruct_time:
                os.remove(df_zip)
                break
            time.sleep(5)

    def mask_columns(self, df: pd.DataFrame, columns: List[str]) -> pd.DataFrame:
        """Mask sensitive data in the specified DataFrame columns by applying a SHA-256 hash function.

        Args:
            df (pandas.DataFrame): The DataFrame to mask sensitive data in.
            columns (list): A list of strings specifying the names of the columns to mask.

        Returns:
            pandas.DataFrame: A copy of the input DataFrame with the specified columns masked.
        """
        df = df.copy()
        for col in columns:
            df[col] = df[col].apply(
                lambda x: hashlib.sha256(str(x).encode()).hexdigest())
        return df

    def anonymize_columns(self, df: pd.DataFrame, columns: List[str]) -> pd.DataFrame:
        """Anonymize sensitive data in the specified DataFrame columns by replacing it with random characters.

        Args:
            df (pandas.DataFrame): The DataFrame to anonymize sensitive data in.
            columns (list): A
        list of strings specifying the names of the columns to anonymize.

        less
        Copy code
        Returns:
            pandas.DataFrame: A copy of the input DataFrame with the specified columns anonymized.
        """
        df = df.copy()
        for col in columns:
            df[col] = df[col].apply(lambda x: ''.join(
                random.choices(string.ascii_uppercase + string.digits, k=10)))
        return df

    
    
# zipminator/unzipit.py
import pandas as pd
import pyzipper
import time
import threading
import getpass
import os
import hashlib
import random
import string
import re


class Unzipndel:
    """Class for unzipping and reading a file using Zipminator.

    Attributes:
        file_name (str): The name of the file to be unzipped and read, default is 'df'.
        file_format (str): The file format of the file to be unzipped and read, default is 'csv'.

    Methods:
        unzipit(): Unzip the file, read it using pandas, and delete the unzipped file.
    """

    def __init__(self, file_name='df', file_format='csv'):
        self.file_name = file_name
        self.file_format = file_format

    def unzipit(self):
        """Unzip the file, read it using pandas, and delete the unzipped file.

        Returns:
            pd.DataFrame: A pandas dataframe containing the unzipped and read data.

        Raises:
            RuntimeError: If the password is incorrect or the file cannot be unzipped.
        """
        password = getpass.getpass('Password: ')
        with pyzipper.AESZipFile(f"{self.file_name}.zip") as zf:
            zf.setpassword(password.encode())
            zf.extract(self.file_name)

        read_func = getattr(pd, f'read_{self.file_format}')
        df = read_func(self.file_name)

        os.remove(self.file_name)

        return df

In [28]:
#zipminator/zipit.py
import pandas as pd
import pyzipper
import time
import threading
import getpass
import os
import hashlib
import random
import string
from typing import List


class Zipndel:
    """Class for compressing and encrypting Pandas DataFrames and deleting the original file.

    Attributes:
        file_name (str): The name of the file to be written, default is 'df'.
        file_format (str): The file format of the file to be written, default is 'csv'.
        self_destruct_time (tuple): A tuple of (hours, minutes, seconds) until self-destruct, default is (672, 0, 0).
        password (str): The password to use for the zip file, default is None.
        encryption_algorithm (str): The encryption algorithm to use for the zip file, default is 'AES'.
        mask_columns (list): The list of columns to mask, default is None.
        anonymize_columns (list): The list of columns to anonymize, default is None.
        compliance_check (bool): Whether to perform a compliance check on the data, default is False.
        audit_trail (bool): Whether to keep an audit trail, default is False.

    Methods:
        mask_columns(df: pd.DataFrame, columns: List[str]) -> pd.DataFrame:
            Mask sensitive data in the specified DataFrame columns by applying a SHA-256 hash function.

        anonymize_columns(df: pd.DataFrame, columns: List[str]) -> pd.DataFrame:
            Anonymize sensitive data in the specified DataFrame columns by replacing it with random characters.

        zipit(df: pd.DataFrame) -> None:
            Write the input DataFrame to a file, create a zip file with the written file, set a password for the zip file,
            and delete the written file.

        self_destruct(hours: int, minutes: int, seconds: int) -> None:
            Delete the compressed and encrypted file after a specified amount of time.

        decompress_and_read() -> pd.DataFrame:
            Unzip the file, read it using pandas, and delete the unzipped file.

    Example:
        >>> df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]})
        >>> Zipndel(file_name='my_file', password='my_password', mask_columns=['B'], anonymize_columns=['C']).zipit(df)
    """

    def __init__(self, file_name='df', file_format='csv', self_destruct_time=(672, 0, 0), password=None, encryption_algorithm='AES', mask_columns=None, anonymize_columns=None, compliance_check=False, audit_trail=False):
        """
        Initialize the Zipndel object.

        Args:
            file_name (str): The name of the file to be written, default is 'df'.
            file_format (str): The file format of the file to be written, default is 'csv'.
            self_destruct_time (tuple): A tuple of (hours, minutes, seconds) until self-destruct, default is (672, 0, 0).
            password (str): The password to use for the zip file, default is None.
            encryption_algorithm (str): The encryption algorithm to use for the zip file, default is 'AES'.
            mask_columns (list): The list of columns to mask, default is None.
            anonymize_columns (list): The list of columns to anonymize, default is None.
            compliance_check (bool): Whether to perform a  
            
            Args:
            file_name (str): The name of the file to be written, default is 'df'.
            file_format (str): The file format of the file to be written, default is 'csv'.
            self_destruct_time (tuple): A tuple of (hours, minutes, seconds) until self-destruct, default is (672, 0, 0).
            password (str): The password to use for the zip file, default is None.
            encryption_algorithm (str): The encryption algorithm to use for the zip file, default is 'AES'.
            mask_columns (list): The list of columns to mask, default is None.
            anonymize_columns (list): The list of columns to anonymize, default is None.
            compliance_check (bool): Whether to perform a compliance check on the data, default is False.
            audit_trail (bool): Whether to keep an audit trail, default is False.
            """
        self.file_name = file_name
        self.file_format = file_format
        self.self_destruct_time = self_destruct_time
        self.password = password
        self.encryption_algorithm = encryption_algorithm
        self.mask_columns = mask_columns
        self.anonymize_columns = anonymize_columns
        self.compliance_check = compliance_check
        self.audit_trail = audit_trail


    def zipit(self, df: pd.DataFrame) -> None:
        """
        Write the input DataFrame to a file, create a zip file with the written file, set a password for the zip file,
        and delete the written file.

        Args:
            df (pandas.DataFrame): The DataFrame to compress and encrypt.

        Returns:
            None

        Raises:
            None
        """
        df_name = df.name if df.name is not None else "df"
        if self.mask_columns is not None:
            df = self.mask_columns(df, self.mask_columns)

        if self.anonymize_columns is not None:
            df = self.anonymize_columns(df, self.anonymize_columns)

        write_func = getattr(df, f'to_{self.file_format}')
        write_func(f"{df_name}.{self.file_format}", index=False)

        df_zip = f"{df_name}.zip"
        with pyzipper.AESZipFile(df_zip, 'w', compression=pyzipper.ZIP_DEFLATED, encryption=getattr(pyzipper, f'WZ_{self.encryption_algorithm}')) as zf:
            if self.password is None:
                self.password = getpass.getpass('Enter password: ')
            zf.setpassword(self.password.encode('utf-8'))
            zf.write(f"{df_name}.{self.file_format}")

        os.remove(f"{df_name}.{self.file_format}")

        if self.self_destruct_time and self.self_destruct_time != False:
            t = threading.Thread(target=self.self_destruct,
                                    args=self.self_destruct_time)
            t.start()


    def self_destruct(self, hours: int, minutes: int, seconds: int) -> None:
        """Delete the compressed and encrypted file after a specified amount of time has elapsed.

        Args:
            hours (int): The number of hours until file deletion.
            minutes (int): The number of minutes until file deletion.
            seconds (int): The number of seconds until file deletion.

        Returns:
        None
        """
        df_zip = f"{self.file_name}.zip"
        self_destruct_time = time.time() + hours * 60 * 60 + minutes * 60 + seconds
        while True:
            if time.time() > self_destruct_time:
                os.remove(df_zip)
                break
            time.sleep(5)

    def mask_columns(self, df: pd.DataFrame, columns: List[str]) -> pd.DataFrame:
        """Mask sensitive data in the specified DataFrame columns by applying a SHA-256 hash function.

        Args:
            df (pandas.DataFrame): The DataFrame to mask sensitive data in.
            columns (list): A list of strings specifying the names of the columns to mask.

        Returns:
            pandas.DataFrame: A copy of the input DataFrame with the specified columns masked.
        """
        df = df.copy()
        for col in columns:
            df[col] = df[col].apply(
                lambda x: hashlib.sha256(str(x).encode()).hexdigest())
        return df

    def anonymize_columns(self, df: pd.DataFrame, columns: List[str]) -> pd.DataFrame:
        """Anonymize sensitive data in the specified DataFrame columns by replacing it with random characters.

        Args:
            df (pandas.DataFrame): The DataFrame to anonymize sensitive data in.
            columns (list): A
        list of strings specifying the names of the columns to anonymize.

        less
        Copy code
        Returns:
            pandas.DataFrame: A copy of the input DataFrame with the specified columns anonymized.
        """
        df = df.copy()
        for col in columns:
            df[col] = df[col].apply(lambda x: ''.join(
                random.choices(string.ascii_uppercase + string.digits, k=10)))
        return df

    
    
# zipminator/unzipit.py
import pandas as pd
import pyzipper
import time
import threading
import getpass
import os
import hashlib
import random
import string
import re


class Unzipndel:
    """Class for unzipping and reading a file using Zipminator.

    Attributes:
        file_name (str): The name of the file to be unzipped and read, default is 'df'.
        file_format (str): The file format of the file to be unzipped and read, default is 'csv'.

    Methods:
        unzipit(): Unzip the file, read it using pandas, and delete the unzipped file.
    """

    def __init__(self, file_name='df', file_format='csv'):
        self.file_name = file_name
        self.file_format = file_format

    def unzipit(self):
        """Unzip the file, read it using pandas, and delete the unzipped file.

        Returns:
            pd.DataFrame: A pandas dataframe containing the unzipped and read data.

        Raises:
            RuntimeError: If the password is incorrect or the file cannot be unzipped.
        """
        password = getpass.getpass('Password: ')
        with pyzipper.AESZipFile(f"{self.file_name}.zip") as zf:
            zf.setpassword(password.encode())
            zf.extract(self.file_name)

        read_func = getattr(pd, f'read_{self.file_format}')
        df = read_func(self.file_name)

        os.remove(self.file_name)

        return df

In [31]:
df0 = pd.DataFrame({'Name': ['John Doe', 'Jane Doe', 'Bob Smith'], 'Age': [25, 30, 40], 'SSN': ['123-45-6789', '987-65-4321', '555-55-5555']})

#Initialize a Zipndel object with default values
zipper = Zipndel(self_destruct_time=(0, 0, 15))

#Zip and encrypt the DataFrame
zipper.zipit(df0)

#This will pack df0 in `df0.csv` password protect inside `df0.zip`  prompt the user for password such  


AttributeError: 'DataFrame' object has no attribute 'name'

In [None]:
#Unzip and read the DataFrame

df0 = Unzipndel().unzipit()
df0

In [None]:
# Initialize a Zipndel object with default values
zipper = Zipndel()

# Zip and encrypt the DataFrame with name 'df0'
zipper.zipit(df0)