In [2]:
class Zipndel:
    def __init__(self, file_name: str = 'df', file_format: str = 'csv'):
        """
        Initialize Zipndel object.

        Parameters:
        file_name (str): the name of the file to be written, default is 'df'
        file_format (str): the file format of the file to be written, default is 'csv'
        """
        self.file_name = file_name
        self.file_format = file_format

    def zipit(self, df: pd.DataFrame) -> None:
        """
        Write the input dataframe to a file, create a zip file with the written file, set a password for the zip file, and delete the written file.

        Parameters:
        df (pd.DataFrame): the input dataframe to be written to file and zipped

        Example:
        >>> df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]})
        >>> Zipndel().zipit(df)
        """
        # write dataframe to a pandas supported file
        write_func = getattr(df, f'to_{self.file_format}')
        write_func(self.file_name, index=False)

        # create zip file and add the written file to it
        df_zip = f"{self.file_name}.zip"
        zf = zipfile.ZipFile(df_zip, mode='w')
        try:
            zf.write(self.file_name, compress_type=zipfile.ZIP_DEFLATED)
        finally:
            zf.close()

        # set password for zip file
        passwd = getpass.getpass('Password:')
        os.system(f'zip --password {passwd} {df_zip} {self.file_name}')

        # delete written file
        os.remove(self.file_name)

        # set self-destruct timer for the zip file
        days = int(input("Enter number of days for zip file to self-destruct: "))
        self_destruct_time = time.time() + days * 24 * 60 * 60
        while True:
            if time.time() > self_destruct_time:
                os.remove(df_zip)
                print(f"{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} - Zip file deleted due to self-destruct timer.")
                break
            time.sleep(5)


In [3]:
from zipminator.zipit import Zipndel
import pandas as pd
import getpass
import zipfile
import os

In [4]:
zipndel = Zipndel(file_name='df', file_format='csv')
df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]})
zipndel.zipit(df)

Password: ········
updating: df (stored 0%)


In [6]:
import zipfile
import os
import getpass
import time
import datetime
import pandas as pd

class Zipndel:
    def __init__(self, file_name: str = 'df', file_format: str = 'csv'):
        """
        Initialize Zipndel object.

        Parameters:
        file_name (str): the name of the file to be written, default is 'df'
        file_format (str): the file format of the file to be written, default is 'csv'
        """
        self.file_name = file_name
        self.file_format = file_format

    def zipit(self, df: pd.DataFrame) -> None:
        """
        Write the input dataframe to a file, create a zip file with the written file, set a password for the zip file, and delete the written file.

        Parameters:
        df (pd.DataFrame): the input dataframe to be written to file and zipped

        Example:
        >>> df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]})
        >>> Zipndel().zipit(df)
        """
        # write dataframe to a pandas supported file
        write_func = getattr(df, f'to_{self.file_format}')
        write_func(self.file_name, index=False)

        # create zip file and add the written file to it
        df_zip = f"{self.file_name}.zip"
        zf = zipfile.ZipFile(df_zip, mode='w')
        try:
            zf.write(self.file_name, compress_type=zipfile.ZIP_DEFLATED)
        finally:
            zf.close()

        # set password for zip file
        passwd = getpass.getpass('Password:')
        os.system(f'zip --password {passwd} {df_zip} {self.file_name}')

        # delete written file
        os.remove(self.file_name)

    def self_destruct(self, hours: int, minutes: int, seconds: int) -> None:
        """
        Set the self-destruct timer for the zip file.

        Parameters:
        hours (int): number of hours until self-destruct
        minutes (int): number of minutes until self-destruct
        seconds (int): number of seconds until self-destruct
        """
        df_zip = f"{self.file_name}.zip"
        self_destruct_time = time.time() + hours * 60 * 60 + minutes * 60 + seconds
        while True:
            if time.time() > self_destruct_time:
                os.remove(df_zip)
                print(f"{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} - Zip file deleted due to self-destruct timer.")
                break
            time.sleep(5)


In [1]:
zipndel = Zipndel(file_name='df', file_format='csv')
df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]})
zipndel.zipit(df)
zipndel.self_destruct(0, 0, 18)


NameError: name 'Zipndel' is not defined

In [2]:
from zipminator import Zipndel, Unzipndel
df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]})
zipndel = Zipndel(file_name='df', file_format='csv', self_destruct_time=(0, 0, 18))
zipndel.zipit(df)
unzipndel = Unzipndel(file_name='df', file_format='csv')
passwd = getpass.getpass('Password:')
df = unzipndel.unzipit(passwd=passwd)
print(df.head())

ModuleNotFoundError: No module named 'zipminator'

# V 0.3.0

In [1]:
import pandas as pd
import pyzipper
import time
import threading
import getpass
import os
import hashlib
import random
import string
from typing import List


class Zipndel:
    """Class for compressing and encrypting Pandas DataFrames and deleting the original file.

    Attributes:
        file_name (str): The name of the file to be written, default is 'df'.
        file_format (str): The file format of the file to be written, default is 'csv'.
        self_destruct_time (tuple): A tuple of (hours, minutes, seconds) until self-destruct, default is (672, 0, 0).
        password (str): The password to use for the zip file, default is None.
        encryption_algorithm (str): The encryption algorithm to use for the zip file, default is 'AES'.
        mask_columns (list): The list of columns to mask, default is None.
        anonymize_columns (list): The list of columns to anonymize, default is None.
        compliance_check (bool): Whether to perform a compliance check on the data, default is False.
        audit_trail (bool): Whether to keep an audit trail, default is False.

    Methods:
        mask_columns(df: pd.DataFrame, columns: List[str]) -> pd.DataFrame:
            Mask sensitive data in the specified DataFrame columns by applying a SHA-256 hash function.

        anonymize_columns(df: pd.DataFrame, columns: List[str]) -> pd.DataFrame:
            Anonymize sensitive data in the specified DataFrame columns by replacing it with random characters.

        zipit(df: pd.DataFrame) -> None:
            Write the input DataFrame to a file, create a zip file with the written file, set a password for the zip file,
            and delete the written file.

        self_destruct(hours: int, minutes: int, seconds: int) -> None:
            Delete the compressed and encrypted file after a specified amount of time.

        decompress_and_read() -> pd.DataFrame:
            Unzip the file, read it using pandas, and delete the unzipped file.

    Example:
        >>> df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]})
        >>> Zipndel(file_name='my_file', password='my_password', mask_columns=['B'], anonymize_columns=['C']).zipit(df)
    """

    def __init__(self, file_name='df', file_format='csv', self_destruct_time=(672, 0, 0), password=None, encryption_algorithm='AES', mask_columns=None, anonymize_columns=None, compliance_check=False, audit_trail=False):
        """
        Initialize the Zipndel object.

        Args:
            file_name (str): The name of the file to be written, default is 'df'.
            file_format (str): The file format of the file to be written, default is 'csv'.
            self_destruct_time (tuple): A tuple of (hours, minutes, seconds) until self-destruct, default is (672, 0, 0).
            password (str): The password to use for the zip file, default is None.
            encryption_algorithm (str): The encryption algorithm to use for the zip file, default is 'AES'.
            mask_columns (list): The list of columns to mask, default is None.
            anonymize_columns (list): The list of columns to anonymize, default is None.
            compliance_check (bool): Whether to perform a compliance check on the data, default is False.
            audit_trail (bool): Whether to keep an audit trail, default is False.
            """

        self.file_name = file_name
        self.file_format = file_format
        self.self_destruct_time = self_destruct_time
        self.password = password
        self.encryption_algorithm = encryption_algorithm
        self.mask_columns = mask_columns
        self.anonymize_columns = anonymize_columns
        self.compliance_check = compliance_check
        self.audit_trail = audit_trail
        
    def zipit(self, df: pd.DataFrame) -> None:
        """
        Write the input DataFrame to a file, create a zip file with the written file, set a password for the zip file,
        and delete the written file.

        Args:
            df (pandas.DataFrame): The DataFrame to compress and encrypt.

        Returns:
            None

        Raises:
            None
        """
        if self.mask_columns is not None:
            df = self.mask_columns(df, self.mask_columns)

        if self.anonymize_columns is not None:
            df = self.anonymize_columns(df, self.anonymize_columns)

        write_func = getattr(df, f'to_{self.file_format}')
        write_func(self.file_name, index=False)

        df_zip = f"{self.file_name}.zip"
        with pyzipper.AESZipFile(df_zip, 'w', compression=pyzipper.ZIP_DEFLATED, encryption=getattr(pyzipper, f'WZ_{self.encryption_algorithm}')) as zf:
            if self.password is None:
                self.password = getpass.getpass('Enter password: ')
            zf.setpassword(self.password.encode('utf-8'))
            zf.write(self.file_name)

        os.remove(self.file_name)

        if self.self_destruct_time and self.self_destruct_time != False:
            t = threading.Thread(target=self.self_destruct,
                                 args=self.self_destruct_time)
            t.start()


    def mask_columns(self, df: pd.DataFrame, columns: List[str]) -> pd.DataFrame:
        """Mask sensitive data in the specified DataFrame columns by applying a SHA-256 hash function.

        Args:
            df (pandas.DataFrame): The DataFrame to mask sensitive data in.
            columns (list): A list of strings specifying the names of the columns to mask.

        Returns:
            pandas.DataFrame: A copy of the input DataFrame with the specified columns masked.
        """
        df = df.copy()
        for col in columns:
            df[col] = df[col].apply(
                lambda x: hashlib.sha256(str(x).encode()).hexdigest())
        return df

    def anonymize_columns(self, df: pd.DataFrame, columns: List[str]) -> pd.DataFrame:
        """Anonymize sensitive data in the specified DataFrame columns by replacing it with random characters.

        Args:
            df (pandas.DataFrame): The DataFrame to anonymize sensitive data in.
            columns (list): A list of strings specifying the names of the columns to anonymize.

        Returns:
            pandas.DataFrame: A copy of the input DataFrame with the specified columns anonymized.
        """
        df = df.copy()
        for col in columns:
            df[col] = df[col].apply(lambda x: ''.join(
                random.choices(string.ascii_uppercase + string.digits, k=10)))
        return df


    def self_destruct(self, hours: int, minutes: int, seconds: int) -> None:
        """Delete the compressed and encrypted file after a specified amount of time has elapsed.

        Args:
            hours (int): The number of hours until file deletion.
            minutes (int): The number of minutes until file deletion.
            seconds (int): The number of seconds until file deletion.

        Returns:
        None
        """
        df_zip = f"{self.file_name}.zip"
        self_destruct_time = time.time() + hours * 60 * 60 + minutes * 60 + seconds
        while True:
            if time.time() > self_destruct_time:
                os.remove(df_zip)
                break
            time.sleep(5)


# zipminator/unzipit.py
import pandas as pd
import pyzipper
import time
import threading
import getpass
import os
import hashlib
import random
import string
import re


class Unzipndel:
    """Class for unzipping and reading a file using Zipminator.

    Attributes:
        file_name (str): The name of the file to be unzipped and read, default is 'df'.
        file_format (str): The file format of the file to be unzipped and read, default is 'csv'.

    Methods:
        unzipit(): Unzip the file, read it using pandas, and delete the unzipped file.
    """

    def __init__(self, file_name='df', file_format='csv'):
        self.file_name = file_name
        self.file_format = file_format

    def unzipit(self):
        """Unzip the file, read it using pandas, and delete the unzipped file.

        Returns:
            pd.DataFrame: A pandas dataframe containing the unzipped and read data.

        Raises:
            RuntimeError: If the password is incorrect or the file cannot be unzipped.
        """
        password = getpass.getpass('Password: ')
        with pyzipper.AESZipFile(f"{self.file_name}.zip") as zf:
            zf.setpassword(password.encode())
            zf.extract(self.file_name)

        read_func = getattr(pd, f'read_{self.file_format}')
        df = read_func(self.file_name)

        os.remove(self.file_name)

        return df

In [4]:
# dummmy dataset

import pandas as pd

data = {
    'Navn': ['Emilie', 'Marius', 'Ingrid', 'Ola', 'Kari', 'Sofie', 'Anders', 'Lise', 'Arne', 'Kristin'],
    'Etternavn': ['Hansen', 'Solberg', 'Johansen', 'Nilsen', 'Andersen', 'Larsen', 'Olsen', 'Madsen', 'Pedersen', 'Bakken'],
    'Personnummer': ['12057840234', '03039918904', '09118747239', '12016246564', '04109130573', '15066312043', '11038427126', '18099646252', '30098719564', '28026746854'],
    'Adresse': ['Storgata 5', 'Hestehagen 12', 'Sentralgata 9', 'Kongens gate 3', 'Langveien 15', 'Brugata 7', 'Karl Johans gate 20', 'Søndre gate 4', 'Kristian IVs gate 2', 'Torggata 15'],
    'Email': ['emilie.hansen@gmail.com', 'marius.solberg@hotmail.com', 'ingrid.johansen@yahoo.com', 'ola.nilsen@gmail.com', 'kari.andersen@outlook.com', 'sofie.larsen@gmail.com', 'anders.olsen@yahoo.com', 'lise.madsen@hotmail.com', 'arne.pedersen@gmail.com', 'kristin.bakken@hotmail.com']
}

df = pd.DataFrame(data) 
df.head()

Unnamed: 0,Navn,Etternavn,Personnummer,Adresse,Email
0,Emilie,Hansen,12057840234,Storgata 5,emilie.hansen@gmail.com
1,Marius,Solberg,3039918904,Hestehagen 12,marius.solberg@hotmail.com
2,Ingrid,Johansen,9118747239,Sentralgata 9,ingrid.johansen@yahoo.com
3,Ola,Nilsen,12016246564,Kongens gate 3,ola.nilsen@gmail.com
4,Kari,Andersen,4109130573,Langveien 15,kari.andersen@outlook.com


In [3]:
Zipndel().zipit(df)

Enter password:  ········


In [3]:
dfu = Unzipndel().unzipit();dfu.head()

Password:  ········


Unnamed: 0,Navn,Etternavn,Personnummer,Adresse,Email
0,Emilie,Hansen,12057840234,Storgata 5,emilie.hansen@gmail.com
1,Marius,Solberg,3039918904,Hestehagen 12,marius.solberg@hotmail.com
2,Ingrid,Johansen,9118747239,Sentralgata 9,ingrid.johansen@yahoo.com
3,Ola,Nilsen,12016246564,Kongens gate 3,ola.nilsen@gmail.com
4,Kari,Andersen,4109130573,Langveien 15,kari.andersen@outlook.com


In [5]:
# write df to csv
df.to_csv('data.csv', index=False)

In [6]:
import pandas as pd
from zipminator import Zipndel

dfs = pd.read_csv('data.csv')
zipper = Zipndel(mask_columns=['Email'])
zipper.zipit(dfs)


TypeError: __init__() got an unexpected keyword argument 'mask_columns'

In [8]:
import pandas as pd
from zipminator import Zipndel

dfs = pd.read_csv('data.csv')
zipper = Zipndel()
zipper.zipit(dfs, mask_columns=['Email'])

TypeError: zipit() got an unexpected keyword argument 'mask_columns'

In [9]:
dfs

Unnamed: 0,Navn,Etternavn,Personnummer,Adresse,Email
0,Emilie,Hansen,12057840234,Storgata 5,emilie.hansen@gmail.com
1,Marius,Solberg,3039918904,Hestehagen 12,marius.solberg@hotmail.com
2,Ingrid,Johansen,9118747239,Sentralgata 9,ingrid.johansen@yahoo.com
3,Ola,Nilsen,12016246564,Kongens gate 3,ola.nilsen@gmail.com
4,Kari,Andersen,4109130573,Langveien 15,kari.andersen@outlook.com
5,Sofie,Larsen,15066312043,Brugata 7,sofie.larsen@gmail.com
6,Anders,Olsen,11038427126,Karl Johans gate 20,anders.olsen@yahoo.com
7,Lise,Madsen,18099646252,Søndre gate 4,lise.madsen@hotmail.com
8,Arne,Pedersen,30098719564,Kristian IVs gate 2,arne.pedersen@gmail.com
9,Kristin,Bakken,28026746854,Torggata 15,kristin.bakken@hotmail.com


In [2]:
#import pandas as pd
#from zipminator import Zipndel

# create a sample DataFrame
df = pd.DataFrame({
    'Navn': ['Ola', 'Kari', 'Per'],
    'Etternavn': ['Nordmann', 'Nordmann', 'Hansen'],
    'Personnummer': [123456789, 987654321, 111111111],
    'Adresse': ['Oslo', 'Bergen', 'Trondheim'],
    'Email': ['ola@example.com', 'kari@example.com', 'per@example.com']
})

# create a Zipndel object and compress the DataFrame with the 'Email' column masked
zipper = Zipndel(file_name='sample_data', mask_columns=['Email'])
zipper.zipit(df)

# read the compressed and encrypted file back to a DataFrame
df2 = zipper.decompress_and_read()

# print the original and the decompressed DataFrames
print(df)
print(df2)


TypeError: 'list' object is not callable

In [13]:
import pandas as pd
#from zipminator import Zipndel

# create a Pandas DataFrame with Norwegian names, surnames, personal numbers, addresses, and emails
df = pd.DataFrame({
    'Navn': ['Ola', 'Kari', 'Per'],
    'Etternavn': ['Nordmann', 'Nordmann', 'Hansen'],
    'Personnummer': ['01020312345', '02030423456', '03040534567'],
    'Adresse': ['Osloveien 12', 'Bergenveien 34', 'Trondheimsveien 56'],
    'Email': ['ola@example.com', 'kari@example.com', 'per@example.com']
})

# create a Zipndel object and compress the DataFrame with the 'Email' column masked
zipper = Zipndel(file_name='sample_data', mask_columns=['Email'])
zipper.zipit(df)

# read the compressed and encrypted file back to a DataFrame
df_decompressed = zipper.decompress_and_read()

# print the decompressed DataFrame
print(df_decompressed)


TypeError: __init__() got an unexpected keyword argument 'mask_columns'

In [3]:
import pandas as pd
import pyzipper
import time
import threading
import getpass
import os
import hashlib
import random
from typing import List


ma

In [4]:
#import pandas as pd
#from zipminator import Zipndel

# create a Pandas DataFrame with Norwegian names, surnames, personal numbers, addresses, and emails
df = pd.DataFrame({
    'Navn': ['Ola', 'Kari', 'Per'],
    'Etternavn': ['Nordmann', 'Nordmann', 'Hansen'],
    'Personnummer': ['01020312345', '02030423456', '03040534567'],
    'Adresse': ['Osloveien 12', 'Bergenveien 34', 'Trondheimsveien 56'],
    'Email': ['ola@example.com', 'kari@example.com', 'per@example.com']
})

# create a Zipndel object and compress the DataFrame with the 'Email' column masked
zipper = Zipndel(file_name='sample_data', mask_columns=['Email'])
zipper.zipit(df)

TypeError: 'list' object is not callable

In [None]:
import pandas as pd
import pyzipper
import time
import threading
import getpass
import os
import hashlib
import random
import string
from typing import List


class Zipndel:
    """Class for compressing and encrypting Pandas DataFrames and deleting the original file.

    Attributes:
        file_name (str): The name of the file to be written, default is 'df'.
        file_format (str): The file format of the file to be written, default is 'csv'.
        self_destruct_time (tuple): A tuple of (hours, minutes, seconds) until self-destruct, default is (672, 0, 0).
        password (str): The password to use for the zip file, default is None.
        encryption_algorithm (str): The encryption algorithm to use for the zip file, default is 'AES'.
        mask_columns (list): The list of columns to mask, default is None.
        anonymize_columns (list): The list of columns to anonymize, default is None.
        compliance_check (bool): Whether to perform a compliance check on the data, default is False.
        audit_trail (bool): Whether to keep an audit trail, default is False.

    Methods:
        mask_sensitive_columns(df: pd.DataFrame, columns: List[str]) -> pd.DataFrame:
            Mask sensitive data in the specified DataFrame columns by applying a SHA-256 hash function.

        anonymize_columns(df: pd.DataFrame, columns: List[str]) -> pd.DataFrame:
            Anonymize sensitive data in the specified DataFrame columns by replacing it with random characters.

        zipit(df: pd.DataFrame) -> None:
            Write the input DataFrame to a file, create a zip file with the written file, set a password for the zip file,
            and delete the written file.

        self_destruct(hours: int, minutes: int, seconds: int) -> None:
            Delete the compressed and encrypted file after a specified amount of time.

        decompress_and_read() -> pd.DataFrame:
            Unzip the file, read it using pandas, and delete the unzipped file.

    Example:
        >>> df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]})
        >>> Zipndel(file_name='my_file', password='my_password', mask_columns=['B'], anonymize_columns=['C']).zipit(df)
    """

    def __init__(self, file_name='df', file_format='csv', self_destruct_time=(672, 0, 0), password=None, encryption_algorithm='AES', mask_columns=None, anonymize_columns=None, compliance_check=False, audit_trail=False):
        """
        Initialize the Zipndel object.

        Args:
            file_name (str): The name of the file to be written, default is 'df'.
            file_format (str): The file format of the file to be written, default is 'csv'.
            self_destruct_time (tuple): A tuple of (hours, minutes, seconds) until self-destruct, default is (672, 0, 0)
            
import pandas as pd
import pyzipper
import time
import threading
import getpass
import os
import hashlib
import random
from typing import List

class Zipndel:
def init(self, file_name='df', file_format='csv', self_destruct_time=(672, 0, 0), password=None, encryption_algorithm='AES', mask_columns=None, anonymize_columns=None, compliance_check=False, audit_trail=False):
self.file_name = file_name
self.file_format = file_format
self.self_destruct_time = self_destruct_time
self.password = password
self.encryption_algorithm = encryption_algorithm
self.mask_columns = mask_columns
self.anonymize_columns = anonymize_columns
self.compliance_check = compliance_check
self.audit_trail = audit_trail


