## 内存字符串

In [1]:
from io import StringIO
f = StringIO()
f.write('hello')
f.write(' ')
f.write('world!')
print(f.getvalue())
f = StringIO('Hello!\nHi!\nGoodbye!')
while True:
    
    s = f.readline()
    if s == '':
        break
    print(s.strip())

hello world!
Hello!
Hi!
Goodbye!


## 内存字节流

In [1]:
from io import BytesIO
f = BytesIO()
f.write('中文'.encode('utf-8'))
print(f.getvalue())
type(f)
print(f.read()) # 由于此时游标是从f 的 最后的位置开始 read，那么后面的内容肯定是空 
f.tell() # 说明游标是在f最后的位置 
f.seek(0)  
f.read(2)

b'\xe4\xb8\xad\xe6\x96\x87'
b''


b'\xe4\xb8'

## 以流的形式读入转化成 ndarray 对象

In [24]:
import cv2
import numpy as np

b = b'aaaaaaaaa' # bytes

image_array = np.frombuffer(b, dtype=np.uint8) # numpy array

img_decode = cv2.imdecode(image_array, 1) # 效果等同于cv2.imread()

## zip 压缩解压文件
### 压缩

In [76]:
import os
import zipfile
import shutil
from pathlib import Path

def zipDir(dirPath, outDir=None):
    """
    Compresses the specified folder

    Parameters
    ---------
    dirPath: Destination folder path
    outDir: Save path of the compressed file +xxxx.zip

    Returns
    ---------
    """
    if outDir == None:
        outDir = dirPath + '.zip'
    zip = zipfile.ZipFile(outDir, "w", zipfile.ZIP_DEFLATED)
    for path, dirnames, filenames in os.walk(dirPath):
        # Remove the target and path, and only compress the files and folders under the target folder
        fp = path.replace(dirPath, '')

        for filename in filenames:
            zip.write(os.path.join(path, filename), os.path.join(fp, filename))
    zip.close()

### 解压zip

In [78]:
def un_zip(zipPath, dp=None):  
    """
    Unzip zip file to a folder
     
    Parameters
    ---------
    zipPath: Zip file path 
    dp: Decompression path

    Returns
    ---------
    dir_path: The path of the extracted folder

    Raises
    ---------
    """
    if dp == None:
        ## By default, it is decompressed to the current directory
        dp = zipPath.rsplit('/', 1)[0]
    with zipfile.ZipFile(zipPath, 'r') as f:
        for fn in f.namelist():
            folder_name = zipPath.rsplit('.', 1)[0].rsplit('/', 1)[1]
            if fn.split('/', 1)[0] != folder_name:
                real_dp = Path(os.path.join(dp, folder_name)).as_posix()
            else:
                real_dp = dp
            # Cache folders to solve the problem that duplicate files cannot be renamed due to repeated decompression  
            cache_dir_path = real_dp + '/cache'
            f.extract(fn, cache_dir_path)
            # There are many possible coding situations, rename it correctly
            try:
                new_fn = fn.encode('cp437').decode('utf-8')
            except:
                try:
                    new_fn = fn.encode('cp437').decode('gbk')
                except:
                    new_fn = fn.encode('utf-8').decode('utf-8')
            
            old_fp = Path(os.path.join(cache_dir_path, fn)).as_posix()
            new_fp = Path(os.path.join(real_dp, new_fn)).as_posix()
            # os.rename(old_fp, new_fp)
            # Overwrite when the file already exists
            if os.path.exists(new_fp):
                if os.path.isfile(new_fp):
                    print(new_fp)
                    os.remove(new_fp)
                else:
                    ## The folder cannot be deleted for unknown reasons
                    shutil.rmtree(new_fp)
                os.rename(old_fp, new_fp)
            else:
                os.rename(old_fp, new_fp)
    dir_path = zipPath.rsplit('.', 1)[0]
    # Delete the folder named __MACOSX if it exists
    for root, dirs, files in os.walk(dir_path, topdown=False):
        for name in dirs:
            if name == '__MACOSX' or 'cahe':
                shutil.rmtree(os.path.join(root, name))
    return dir_path 

    
zp = 'data/8.12.zip'
un_zip(zp, dp=None)

data/8.12/哈森股份首次公开发行股票招股说明书_603958_20160616_1.json
data/8.12/国投中鲁招股说明书_600962_20040602_1.json
data/8.12/国投新集招股说明书_601918_20071217_2.json
data/8.12/国药股份招股说明书_600511_20021108_1.json
data/8.12/国阳新能招股说明书_600348_20030801_1.json
data/8.12/海兴电力首次公开发行A股股票招股说明书_603556_20161028_3.json
data/8.12/海利尔首次公开发行股票招股说明书_603639_20161229_1.json
data/8.12/海利生物首次公开发行股票招股说明书_603718_20150505_3.json
data/8.12/海南矿业首次公开发行股票招股说明书_601969_20141124_3.json
data/8.12/海天味业首次公开发行股票招股说明书（更新版）_603288_20140124_2.json
data/8.12/海天味业首次公开发行股票招股说明书_603288_20140123_2.json
data/8.12/海天精工首次公开发行A股股票招股说明书_601882_20161024_1.json
data/8.12/海峡环保首次公开发行股票招股说明书_603817_20170207_4.json
data/8.12/海星股份首次公开发行股票招股说明书_603115_20190730_2.json
data/8.12/海汽集团首次公开发行股票招股说明书_603069_20160628_2.json
data/8.12/海油发展首次公开发行股票招股说明书_600968_20190613_3.json
data/8.12/海油工程招股说明书_600583_20020117_1.json
data/8.12/海通集团招股说明书_600537_20030103_1.json
data/8.12/海鸥股份首次公开发行股票招股说明书_603269_20170504_3.json


'data/8.12'

In [46]:
cd mnt/demo

/mnt/demo
