# zip

### first lets create zip archive

In [2]:
!ls

 Volume in drive C is OS
 Volume Serial Number is 1491-1811

 Directory of C:\Projects\ixam\unzipping

26.08.2025  10:10    <DIR>          .
26.08.2025  10:10    <DIR>          ..
26.08.2025  10:10    <DIR>          .ipynb_checkpoints
26.08.2025  10:10               337 Untitled.ipynb
               1 File(s)            337 bytes
               3 Dir(s)  117˙356˙199˙936 bytes free


In [5]:
import os
import zipfile
import shutil

# 📁 Nazwy folderów i plików
folders = {
    "kat1": ["x.txt", "y.txt"],
    "kat2": ["a.txt", "b.txt"]
}

# 📝 Treść plików
def generate_content(filename):
    return [f"{filename} - linia {i+1}\n" for i in range(100)]

# 🏗️ Tworzenie folderów i plików
for folder, files in folders.items():
    os.makedirs(folder, exist_ok=True)
    for file in files:
        path = os.path.join(folder, file)
        with open(path, "w", encoding="utf-8") as f:
            f.writelines(generate_content(file))

# 📦 Tworzenie archiwum ZIP
zip_filename = "archiwum.zip"
with zipfile.ZipFile(zip_filename, "w", zipfile.ZIP_DEFLATED) as zipf:
    for folder, files in folders.items():
        for file in files:
            filepath = os.path.join(folder, file)
            zipf.write(filepath)

print(f"✅ Utworzono archiwum: {zip_filename}")

# 🧹 Usuwanie folderów i plików
for folder in folders:
    shutil.rmtree(folder)
    print(f"🗑️ Usunięto folder: {folder}")

✅ Utworzono archiwum: archiwum.zip
🗑️ Usunięto folder: kat1
🗑️ Usunięto folder: kat2


In [16]:
for char in "🗑️":
    print(f"U+{ord(char):04X}")

U+1F5D1
U+FE0F


In [20]:
"""
    To Variation Selector-16 (VS16) — specjalny znak, który mówi systemowi: 
    ➡️ „Wyświetl ten znak jako emoji graficzne, a nie jako zwykły symbol tekstowy.”

    🗑️ Przykład: \U0001F5D1 vs \U0001F5D1\U0000FE0F
    Kod Unicode	Wygląd	Opis
    \U0001F5D1	🗑	Może być wyświetlone jako czarno-biały symbol (tekstowy), zależnie od systemu
    \U0001F5D1\U0000FE0F	🗑️	Wymusza pełną wersję emoji (kolorową, graficzną)
"""

"\U0001F5D1\U0000FE0F", "\U0001F5D1"  # 🗑

('🗑️', '🗑')

In [21]:
!ls

 Volume in drive C is OS
 Volume Serial Number is 1491-1811

 Directory of C:\Projects\ixam\unzipping

26.08.2025  10:21    <DIR>          .
26.08.2025  10:21    <DIR>          ..
26.08.2025  10:10    <DIR>          .ipynb_checkpoints
26.08.2025  10:13             1˙320 archiwum.zip
26.08.2025  10:21             5˙166 Untitled.ipynb
               2 File(s)          6˙486 bytes
               3 Dir(s)  117˙289˙824˙256 bytes free


# unzip

In [53]:
import zipfile
from io import BytesIO
import time

start = time.time()

# Załóżmy, że masz zawartość ZIP jako bajty (np. wczytane z pliku lub pobrane z sieci)
with open("archiwum.zip", "rb") as f:
    zip_bytes = f.read()

# Wczytanie ZIP do pamięci
zip_stream = BytesIO(zip_bytes)

# Otwieranie archiwum ZIP z pamięci
with zipfile.ZipFile(zip_stream) as zf:
    # Lista plików w archiwum
    print("Pliki w archiwum:", zf.namelist())

    # Wczytanie konkretnego pliku do pamięci
    for filename in zf.namelist():
        with zf.open(filename) as file:
            content = file.read()
            # print(f"{filename = }")
            # print("Zawartość pliku:", content.decode("utf-8"))

time_elapsed = time.time() - start

Pliki w archiwum: ['archiwum/', 'archiwum/data/', 'archiwum/data/daily/', 'archiwum/data/daily/pl/', 'archiwum/data/daily/pl/wse stocks/', 'archiwum/data/daily/pl/wse stocks/06n.txt', 'archiwum/data/daily/pl/wse stocks/08n.txt', 'archiwum/data/daily/pl/wse stocks/11b.txt', 'archiwum/data/daily/pl/wse stocks/1at.txt', 'archiwum/data/daily/pl/wse stocks/1ata.txt', 'archiwum/data/daily/pl/wse stocks/3rg.txt', 'archiwum/data/daily/pl/wse stocks/4ms.txt', 'archiwum/data/daily/pl/wse stocks/aat.txt', 'archiwum/data/daily/pl/wse stocks/abe.txt', 'archiwum/data/daily/pl/wse stocks/abs.txt', 'archiwum/data/daily/pl/wse stocks/acg.txt', 'archiwum/data/daily/pl/wse stocks/acp.txt', 'archiwum/data/daily/pl/wse stocks/act.txt', 'archiwum/data/daily/pl/wse stocks/adv.txt', 'archiwum/data/daily/pl/wse stocks/ago.txt', 'archiwum/data/daily/pl/wse stocks/agt.txt', 'archiwum/data/daily/pl/wse stocks/ale.txt', 'archiwum/data/daily/pl/wse stocks/alg.txt', 'archiwum/data/daily/pl/wse stocks/ali.txt', 'arch

In [30]:
print(f"{time_elapsed = }")

time_elapsed = 13.241687297821045


In [46]:
# content

In [68]:
import zipfile
from io import BytesIO
import os
import time

start = time.time()

# Parametr: ile ostatnich wierszy zachować
k = 10

# Wczytanie ZIP do pamięci
with open("archiwum.zip", "rb") as f:
    zip_bytes = f.read()

zip_stream = BytesIO(zip_bytes)

# Przetwarzanie archiwum
with zipfile.ZipFile(zip_stream) as zf:
    for name in zf.namelist():
        # Pomijamy katalogi
        if name.endswith("/"):
            continue

        # Wczytanie zawartości pliku
        with zf.open(name) as file:
            content = file.read().decode("utf-8")

        if content:
            # Podział na linie
            lines = content.strip().splitlines()
    
            # Zachowanie nagłówka + ostatnich k wierszy
            # k = min(k, len(lines) - 1)
            # if k > 0:
            #     selected_lines = [lines[0]] + lines[-k:]
            # else:
            #     selected_lines = [lines[0]]
            
            selected_lines = [lines[0]] + lines[-k:]

            # Upewnij się, że ścieżka jest znormalizowana
            output_path = os.path.normpath(output_path)
            
            # Tworzenie brakujących katalogów
            os.makedirs(os.path.dirname(output_path), exist_ok=True)

            # Tworzenie ścieżki do zapisu
            output_path = os.path.join("output", name)
            os.makedirs(os.path.dirname(output_path), exist_ok=True)
    
            # Zapis do pliku
            with open(output_path, "w", encoding="utf-8") as out_file:
                out_file.write("\n".join(selected_lines) + "\n")
            
            prev_output_path = output_path
            
time_elapsed = time.time() - start

print(f"Gotowe! Pliki zapisane w folderze 'output'.\n{time_elapsed = :.3f}")

FileNotFoundError: [Errno 2] No such file or directory: 'output\\data/daily/pl/nc stocks/aux.txt'

In [73]:
with open(output_path, "w", encoding="utf-8") as out_file:
    ...

FileNotFoundError: [Errno 2] No such file or directory: 'output\\data/daily/pl/nc stocks/aux.txt'

In [74]:
prev_output_path, output_path

('output\\data/daily/pl/nc stocks/ato.txt',
 'output\\data/daily/pl/nc stocks/aux.txt')

In [70]:
prev_output_path, output_path, selected_lines

('output\\data/daily/pl/nc stocks/ato.txt',
 'output\\data/daily/pl/nc stocks/aux.txt',
 ['<TICKER>,<PER>,<DATE>,<TIME>,<OPEN>,<HIGH>,<LOW>,<CLOSE>,<VOL>,<OPENINT>',
  'AUX,D,20250811,000000,1.24,1.24,1.21,1.24,59,0',
  'AUX,D,20250812,000000,1.235,1.24,1.21,1.24,433,0',
  'AUX,D,20250813,000000,1.235,1.235,1.2,1.22,144,0',
  'AUX,D,20250814,000000,1.21,1.21,1.18,1.2,12757,0',
  'AUX,D,20250818,000000,1.18,1.2,1.13,1.18,23026,0',
  'AUX,D,20250819,000000,1.18,1.24,1.18,1.24,5208,0',
  'AUX,D,20250820,000000,1.24,1.775,1.24,1.73,182642,0',
  'AUX,D,20250821,000000,1.68,2.11,1.56,2.09,223579,0',
  'AUX,D,20250822,000000,2.05,2.46,1.88,2.46,222534,0',
  'AUX,D,20250825,000000,2.36,3.34,2.24,3.25,316428,0'])

In [51]:
name, lines, content

('archiwum/data/daily/pl/wse stocks/arha.txt', [], '')

In [35]:
content = b'<TICKER>,<PER>,<DATE>,<TIME>,<OPEN>,<HIGH>,<LOW>,<CLOSE>,<VOL>,<OPENINT>\r\nZWC_PE,D,20070813,000000,6.2998,6.2998,6.2998,6.2998,0,0\r\nZWC_PE,D,20070814,000000,5.3364,5.3364,5.3364,5.3364,0,0\r\nZWC_PE,D,20070816,000000,5.0943,5.0943,5.0943,5.0943,0,0\r\nZWC_PE,D,20070817,000000,5.2983,5.2983,5.2983,5.2983,0,0\r\nZWC_PE,D,20070820,000000,5.3364,5.3364,5.3364,5.3364,0,0\r\nZWC_PE,D,20070821,000000,5.3364,5.3364,5.3364,5.3364,0,0\r\nZWC_PE,D,20070822,000000,5.2176,5.2176,5.2176,5.2176,0,0\r\nZWC_PE,D,20070823,000000,5.3364,5.3364,5.3364,5.3364,0,0\r\nZWC_PE,D,20070824,000000,5.3789,5.3789,5.3789,5.3789,0,0\r\nZWC_PE,D,20070827,000000,5.6972,5.6972,5.6972,5.6972,0,0\r\nZWC_PE,D,20070828,000000,5.6167,5.6167,5.6167,5.6167,0,0\r\nZWC_PE,D,20070829,000000,5.7397,5.7397,5.7397,5.7397,0,0\r\nZWC_PE,D,20070830,000000,6.1387,6.1387,6.1387,6.1387,0,0\r\nZWC_PE,D,20070831,000000,6.6232,6.6232,6.6232,6.6232,0,0\r\nZWC_PE,D,20070903,000000,7.0207,7.0207,7.0207,7.0207,0,0\r\nZWC_PE,D,20070904,000000,7.1824,7.1824,7.1824,7.1824,0,0\r\nZWC_PE,D,20070905,000000,6.6232,6.6232,6.6232,6.6232,0,0\r\nZWC_PE,D,20070906,000000,6.4618,6.4618,6.4618,6.4618,0,0\r\n'
content = content.decode("utf-8")
lines = content.strip().splitlines()
len(lines)

19

In [57]:
!ls

 Volume in drive C is OS
 Volume Serial Number is 1491-1811

 Directory of C:\Projects\ixam\unzipping

26.08.2025  11:08    <DIR>          .
26.08.2025  11:08    <DIR>          ..
26.08.2025  10:10    <DIR>          .ipynb_checkpoints
26.08.2025  10:33       198˙715˙980 archiwum - kopia.zip
26.08.2025  11:06        29˙975˙952 archiwum.zip
26.08.2025  11:08    <DIR>          output
26.08.2025  11:05           240˙306 Untitled.ipynb
               3 File(s)    228˙932˙238 bytes
               4 Dir(s)  117˙036˙584˙960 bytes free


In [38]:
lines[-18:]

['ZWC_PE,D,20070813,000000,6.2998,6.2998,6.2998,6.2998,0,0',
 'ZWC_PE,D,20070814,000000,5.3364,5.3364,5.3364,5.3364,0,0',
 'ZWC_PE,D,20070816,000000,5.0943,5.0943,5.0943,5.0943,0,0',
 'ZWC_PE,D,20070817,000000,5.2983,5.2983,5.2983,5.2983,0,0',
 'ZWC_PE,D,20070820,000000,5.3364,5.3364,5.3364,5.3364,0,0',
 'ZWC_PE,D,20070821,000000,5.3364,5.3364,5.3364,5.3364,0,0',
 'ZWC_PE,D,20070822,000000,5.2176,5.2176,5.2176,5.2176,0,0',
 'ZWC_PE,D,20070823,000000,5.3364,5.3364,5.3364,5.3364,0,0',
 'ZWC_PE,D,20070824,000000,5.3789,5.3789,5.3789,5.3789,0,0',
 'ZWC_PE,D,20070827,000000,5.6972,5.6972,5.6972,5.6972,0,0',
 'ZWC_PE,D,20070828,000000,5.6167,5.6167,5.6167,5.6167,0,0',
 'ZWC_PE,D,20070829,000000,5.7397,5.7397,5.7397,5.7397,0,0',
 'ZWC_PE,D,20070830,000000,6.1387,6.1387,6.1387,6.1387,0,0',
 'ZWC_PE,D,20070831,000000,6.6232,6.6232,6.6232,6.6232,0,0',
 'ZWC_PE,D,20070903,000000,7.0207,7.0207,7.0207,7.0207,0,0',
 'ZWC_PE,D,20070904,000000,7.1824,7.1824,7.1824,7.1824,0,0',
 'ZWC_PE,D,20070905,0000

In [None]:
# du -sh */ | sort -hr