# Архивирование и разархивирование файлов

## 1. Модуль ZIPFILE


#### ZIP

In [4]:
import zipfile, os

path = '.'
file_dir = os.listdir(path)
# file_dir

with zipfile.ZipFile('test.zip', mode='w', compression=zipfile.ZIP_DEFLATED) as zf:
    for file in file_dir:
        if file.endswith('ipynb'):
            add_file = os.path.join(path, file)
            zf.write(add_file)


os.system('file test.zip')

1

#### Добавить файл в архив

In [10]:
import zipfile

add_file = './fruits.csv'

with zipfile.ZipFile('test.zip', mode='a', compression=zipfile.ZIP_DEFLATED) as zf:
    zf.write(add_file, arcname='script-add.sql')   # меняем имя файла в архиве

#### Список файлов в архиве

In [15]:
import zipfile

with zipfile.ZipFile('test.zip', mode='a') as zf:
    for file in zf.namelist():
        print(file)

context_manager.ipynb
strings.ipynb
classes_magic.ipynb
data_types.ipynb
class.ipynb
re.ipynb
lists.ipynb
files_zip.ipynb
data_types-defaultdict.ipynb
strings2.ipynb
decorators.ipynb
files.ipynb
files_csv.ipynb
pandas.ipynb
strings3.ipynb
files_json.ipynb
logging.ipynb
script-add.sql


#### Подробная информация о файлах в архиве.

In [19]:
import zipfile, datetime

with zipfile.ZipFile('test.zip', mode='a') as zf:
    for file in zf.infolist():
        # дата файла в архиве
        date = datetime.datetime(*file.date_time)
        # имя файла в архиве без пути
        name = os.path.basename(file.filename)
        # печатаем имя, начальный размер, 
        # размер в архиве, дата файла
        print(f"{name:40}\t{file.file_size}\t{file.compress_size}\t{date.strftime('%H:%M %d.%m.%Y')}")

context_manager.ipynb                   	8900	2546	16:29 25.01.2025
strings.ipynb                           	25527	5069	16:31 10.11.2024
classes_magic.ipynb                     	20566	4403	10:43 26.12.2024
data_types.ipynb                        	8151	2002	10:06 25.12.2024
class.ipynb                             	44042	8442	09:53 10.12.2024
re.ipynb                                	34342	7856	15:46 19.01.2025
lists.ipynb                             	71323	2921	16:15 18.11.2024
files_zip.ipynb                         	2803	1027	15:31 27.01.2025
data_types-defaultdict.ipynb            	3389	967	10:46 26.12.2024
strings2.ipynb                          	4767	1390	16:12 14.11.2024
decorators.ipynb                        	22676	4192	10:32 10.12.2024
files.ipynb                             	8999	2110	17:27 25.01.2025
files_csv.ipynb                         	9592	2152	18:01 25.01.2025
pandas.ipynb                            	2825	849	10:05 29.12.2024
strings3.ipynb                          	243

#### Unzip - Извлечение всех файлов из архива в определенный каталог.

In [21]:
import zipfile, os, glob

extract_dir = 'test_unzipped'

# создаем папку
try:
    os.mkdir('test_unzipped')
except OSError as error:
    print(f'folder "{extract_dir}" already exists')

# извлекаем в нее
with zipfile.ZipFile('test.zip') as zf:
    zf.extractall(extract_dir)

# смотрм что в ней
for file in glob.glob(extract_dir + '/**', recursive=True):
    print(file)

folder "test_unzipped" already exists
test_unzipped/
test_unzipped/context_manager.ipynb
test_unzipped/strings.ipynb
test_unzipped/classes_magic.ipynb
test_unzipped/data_types.ipynb
test_unzipped/class.ipynb
test_unzipped/re.ipynb
test_unzipped/lists.ipynb
test_unzipped/files_zip.ipynb
test_unzipped/script-add.sql
test_unzipped/data_types-defaultdict.ipynb
test_unzipped/strings2.ipynb
test_unzipped/decorators.ipynb
test_unzipped/files.ipynb
test_unzipped/files_csv.ipynb
test_unzipped/pandas.ipynb
test_unzipped/strings3.ipynb
test_unzipped/files_json.ipynb
test_unzipped/logging.ipynb


## 2. Модуль gzip

работает только с единичным файлом

#### Запись в файл

In [24]:
import gzip

# из байт-строки
content = b"Lots of content here"
with gzip.open('test.txt.gz', 'wb') as f:
    f.write(content)

In [25]:
import gzip
import shutil

# компрессия одного файла
with open('fruits.csv', 'rb') as f_in:
    with gzip.open('test.txt.gz', 'wb') as f_out:
        shutil.copyfileobj(f_in, f_out)

#### Чтение

In [30]:
import gzip

with gzip.open('test.txt.gz', 'rb') as f:
    file_content = f.read()

file_content

b'country,fruit,count,price,sum\nEquador,banana,123000,1.25,153750\nEquador,avocado,22000,2.55,56100\nEquador,mango,36000,3.8,136800\nChina,banana,88000,1.2,105600\nChina,mango,45000,3.2,144000\nMarocco,orange,56000,1.9,106400\nEgypt,orange,23000,1.85,42550\nThailand,avocado,18000,2.44,43920\nThailand,mango,42000,3.3,138600\n'

## 3. Модуль tarfile

#### Запись

In [39]:
import tarfile

path = '.'
files = [i for i in os.listdir(path) if i.endswith('ipynb')]

# 'w' без сжатия
# 'w:gz' Open for gzip compressed writing.
# 'w:bz2' Open for bzip2 compressed writing.
# 'w:xz' Open for lzma compressed writing.

with tarfile.open("test.tar.gz", "w:gz") as tar:
    for name in files:
        tar.add(name)

#### прочитать сжатый архивом tar.gz и показать некоторую информацию об элементах архива

In [40]:
import tarfile

with tarfile.open("test.tar.gz", "r:gz") as tar:
    for tarinfo in tar:
        print(tarinfo.name, "is", tarinfo.size, "bytes in size and is", end="")
        if tarinfo.isreg():
            print("a regular file.")
        elif tarinfo.isdir():
            print("a directory.")
        else:
            print("something else.")

context_manager.ipynb is 8900 bytes in size and isa regular file.
strings.ipynb is 25527 bytes in size and isa regular file.
classes_magic.ipynb is 20566 bytes in size and isa regular file.
data_types.ipynb is 8151 bytes in size and isa regular file.
class.ipynb is 44042 bytes in size and isa regular file.
re.ipynb is 34342 bytes in size and isa regular file.
lists.ipynb is 71323 bytes in size and isa regular file.
files_zip.ipynb is 20023 bytes in size and isa regular file.
data_types-defaultdict.ipynb is 3389 bytes in size and isa regular file.
strings2.ipynb is 4767 bytes in size and isa regular file.
decorators.ipynb is 22676 bytes in size and isa regular file.
files.ipynb is 8999 bytes in size and isa regular file.
files_csv.ipynb is 9592 bytes in size and isa regular file.
pandas.ipynb is 2825 bytes in size and isa regular file.
strings3.ipynb is 2431 bytes in size and isa regular file.
files_json.ipynb is 9250 bytes in size and isa regular file.
logging.ipynb is 7811 bytes in si

#### Декомпрессия

In [42]:
import tarfile, os

extract_dir = 'test_unzipped_tar'

# создаем папку
try:
    os.mkdir(extract_dir)
except OSError as error:
    print(f'folder "{extract_dir}" already exists')


with tarfile.open("test.tar.gz") as tar:
    tar.extractall(extract_dir, filter='data')