## 文件操作（file operation）

In [4]:
import os

def read_file(filename):
    if os.path.exists(filename) is False:
        raise FileNotFoundError('%s not exists'%(filename,))
    # 
    f = open(filename,encoding='UTF-8')
    content = f.read()
    f.close()
    return content

In [8]:
read_file('D:\Learn\Code\')

SyntaxError: EOL while scanning string literal (<ipython-input-8-1b9fe2862a93>, line 1)

In [6]:
read_file('D:\Learn\Code\Demo.c')

'#include<stdio.h>\n\nint removeDuplicates(int* nums, int numsSize){\n    \n    int start = 0; \n    int end = 0;\n    int tmpCnt = 0;\n    int newValue = 0;\n\n    for (; start < numsSize - 1; start){\n        \n        /* 统计不符合数组的第一个元素的个数*/\n        newValue = nums[start];\n        while (nums[start++] == nums[start]){\n            tmpCnt++;\n            if(1 == numsSize - start){\n                start = numsSize;\n                break;\n            }\n        }\n\n        /* 向符合数组添加元素 */\n        if (tmpCnt){\n            nums[end++] = newValue;\n            nums[end++] = newValue;\n\n            tmpCnt = 0;\n        }\n        else{\n            nums[end++] = newValue;\n        }\n\n        /* 获取不符合数组的起始端下标 */\n        //start = end + tmpCnt - 1;\n        //tmpCnt = 0;\n    }\n\n    if (1 == (numsSize - start)){\n        nums[end++] = nums[start];\n    }\n    \n    return end;\n}\n\nint main()\n{\n\n    int test[7] = {1,1,1,2,2,3,3};\n    int N = 0;\n\n    printf("before:\\n");\n

In [14]:
def read__file(filename):
    if os.path.exists(filename) is False:
        raise FileNotFoundError('%s not exists'%(filename,))
    with open(filename, encoding = 'UTF-8') as f:
        content = f.read()
    return content

In [15]:
CCC = read__file("D:\Learn\Code\Demo.c")

In [20]:
from collections import defaultdict
import re

rec = re.compile('\s+')
dd = defaultdict(int)
with open('a.txt', 'r+', encoding='UTF-8') as f:
    for line in f:
        clean_line = line.strip()
        if clean_line:
            words = rec.split(clean_line)
            for word in words:
                dd[word] += 1
                
dd = sorted(dd.items(), key=lambda x: x[1], reverse=True)
print('----print stat----')
print(dd)
print('----words stat done----')

----print stat----
[('Python', 3), ('and', 2), ('Hey,', 1), ('I', 1), ('just', 1), ('love', 1), ('so', 1), ('much,', 1), ('want', 1), ('to', 1), ('get', 1), ('the', 1), ('whole', 1), ('stack', 1), ('by', 1), ('this', 1), ('60-days', 1), ('column', 1), ('believe', 1), ('Python!', 1)]
----words stat done----


In [23]:
def write_file(file_path, file_name):
    if os.path.exists(file_path) is False:
        os.mkdir(file_path)
    
    whole_path_filename = os.path.join(file_path, file_name)
    to_write_content = '''
                        Hey, Python
                        I just love Python so much,
                        and want to get the whole python stack by this 60-days column
                        and believe
                        '''
    with open(whole_path_filename, mode="w", encoding = 'UTF-8') as f:
        f.write(to_write_content)
    
    print('----write done-----')
    
    print('----begin reading----')
    with open(whole_path_filename, encoding = 'UTF-8') as f:
        content = f.read()
        print(content)
        if to_write_content == content:
            print('file is wrote sucessfully')
        else:
            print('file isn\'t wrote sucessfully')    

In [24]:
write_file("D:\Learn\Code", "b.txt")

----write done-----
----begin reading----

                        Hey, Python
                        I just love Python so much,
                        and want to get the whole python stack by this 60-days column
                        and believe
                        
file is wrote sucessfully


In [29]:
file_ext = os.path.split("D:\Learn\Code\trie.java")
ipath,ifile = file_ext

In [30]:
file_ext

('D:\\Learn', 'Code\trie.java')

In [27]:
ipath

'D:\\Learn'

In [28]:
ifile

'Code\trie.java'

In [31]:
f_ext = os.path.splitext("D:\Learn\Code\trie.java")

In [32]:
f_ext[0]

'D:\\Learn\\Code\trie'

In [33]:
f_ext[1]

'.java'

In [1]:
## 小案例：批量修改后缀名
import argparse
import os

def get_parser():
    parser = argparse.ArgumentParser(description='工作目录中文件后缀名修改')
    parser.add_argument('work_dir', metavar='WORK_DIR', type=str, nargs=1, help='修改后缀名的文件目录')
    parser.add_argument('old_ext', metavar='OLD_EXT', type=str, nargs=1, help='原来的后缀')
    parser.add_argument('new_ext', metavar='NEW_EXT', type=str, nargs=1, help='新的后缀')
    return parser

def batch_rename(work_dir, old_ext, new_ext):
    """
    传递当前目录，原来后缀名，新的后缀名，批量重命名后缀
    """
    for filename in os.listdir(work_dir):
        # 获取当前文件后缀
        split_file = os.path.splitext(filename)
        file_ext = split_file[1]
        if old_ext == file_ext: # 定位后缀名为old_ext的文件
            newfile = split_file[0] + new_ext # 修改后文件的完整名称
            # 实现重命名操作
            os.rename(
                os.path.join(work_dir, filename),
                os.path.join(work_dir, newfile)
            )
        print("完成重命名")
        print(os.listdir(work_dir))
            
def main():
    # 命令行参数
    parser = get_parser()
    args = vars(parser.parse_args())
    # 从命令行参数中依次解析出参数
    work_dir = args['work_dir'][0]
    old_ext = args['old_ext'][0]
    
    if old_ext[0] != '.':
        old_ext = '.' + old_ext
    new_ext = args['new_ext'][0]
    if new_ext[0] != '.':
        new_ext = '.' + new_ext
        
    batch_rename(work_dir, old_ext, new_ext)

In [7]:
## XLS 批量转换成XLSX
import os

def xls_to_xlsx(work_dir):
    old_ext, new_ext = '.xls', '.xlsx'
    for filename in os.listdir(work_dir):
        # 获取得到文件后缀
        split_file = os.path.splitext(filename)
        file_ext = split_file[1]
        
        # 定位后缀名为 old_ext 的文件
        if old_ext == file_ext:
            # 修改后文件的完整名称
            newfile = split_file[0] + new_ext
            # 实现重名名操作
            os.rename(
                os.path.join(work_dir, filename),
                os.path.join(work_dir, newfile)
            )
    print("完成重命名")
    print(os.listdir(work_dir))        

In [8]:
xls_to_xlsx('.\Test')

完成重命名
['1 - Copy (2).xlsx', '1 - Copy (3).xlsx', '1 - Copy (4).xlsx', '1 - Copy (5) - Copy.xlsx', '1 - Copy (5).xlsx', '1 - Copy (6) - Copy.xlsx', '1 - Copy (6).xlsx', '1 - Copy (7) - Copy.xlsx', '1 - Copy.xlsx', '1.xlsx', '2 - Copy (2).pptx', '2 - Copy (3).pptx', '2 - Copy.pptx', '2.pptx', '3 - Copy (2).docx', '3 - Copy.docx', '3.docx']


In [11]:
## 获取目录下文件的修改时间
import os
from datetime import datetime

print(f"当前时间：{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

def get_modify_time(indir):
    for root, _, files in os.walk(indir): # 循环目录和子目录
        for file in files:
            whole_file_name = os.path.join(root, file)
            modify_time = os.path.getmtime(whole_file_name) 
            nice_show_time = datetime.fromtimestamp(modify_time) # 时间格式转换为人性化显示
            print('文件 %s 最后一次修改时间：%s' %(file, nice_show_time))

当前时间：2020-06-25 22:25:56


In [12]:
get_modify_time('.')

文件 a.txt 最后一次修改时间：2020-06-23 23:23:10.053990
文件 batch_rename.py 最后一次修改时间：2020-06-25 22:05:08.002835
文件 Day01.ipynb 最后一次修改时间：2020-05-31 10:11:31.902367
文件 Day02.ipynb 最后一次修改时间：2020-06-03 00:32:57.961112
文件 Day03.ipynb 最后一次修改时间：2020-06-05 00:20:07.713881
文件 Day04.ipynb 最后一次修改时间：2020-06-07 16:04:18.678303
文件 Day05.ipynb 最后一次修改时间：2020-06-12 21:47:38.864003
文件 Day06.ipynb 最后一次修改时间：2020-06-13 00:25:00.048095
文件 Day07.ipynb 最后一次修改时间：2020-06-15 23:43:37.176372
文件 Day08.ipynb 最后一次修改时间：2020-06-17 23:36:45.078743
文件 Day09.ipynb 最后一次修改时间：2020-06-21 23:22:44.089658
文件 Day10.ipynb 最后一次修改时间：2020-06-25 22:25:01.142059
文件 render.html 最后一次修改时间：2020-06-07 16:03:03.020687
文件 COMMIT_EDITMSG 最后一次修改时间：2020-06-07 16:24:45.107172
文件 config 最后一次修改时间：2020-06-07 19:54:41.855904
文件 description 最后一次修改时间：2020-06-07 16:18:50.868253
文件 FETCH_HEAD 最后一次修改时间：2020-06-08 06:52:05.624976
文件 HEAD 最后一次修改时间：2020-06-07 16:18:50.877232
文件 index 最后一次修改时间：2020-06-07 16:24:45.104142
文件 applypatch-msg.sample 最后一次修改时间：2020-06-07 16:1

In [13]:
## 批量压缩文件
import zipfile
import os 
import time

def batch_zip(start_dir):
    start_dir = start_dir  # 要压缩的文件夹路径
    file_news = start_dir + '.zip'  # 压缩后文件夹的名字
    
    z = zipfile.ZipFile(file_news, 'w', zipfile.ZIP_DEFLATED)
    for dir_path, dir_names, file_names in os.walk(start_dir):
        # 这一句很重要，不replace的话，就从根目录开始复制
        f_path = dir_path.replace(start_dir, '')
        f_path = f_path and f_path + os.sep  # 实现当前文件夹以及包含的所有文件的压缩
        for filename in file_names:
            z.write(os.path.join(dir_path, filename), f_path + filename)
        z.close()
        return file_news

In [14]:
batch_zip('.\Test')

'.\\Test.zip'

In [15]:
## 32位文件加密
import hashlib

def hash_cry32(s):
    m = hashlib.md5()
    m.update((str(s).encode('utf-8')))
    return m.hexdigest()

print(hash_cry32(1))
print(hash_cry32('hello'))

c4ca4238a0b923820dcc509a6f75849b
5d41402abc4b2a76b9719d911017c592


In [17]:
## 定制文件不同行

# 统计文件个数
def statLineCnt(statfile):
    print('文件名'+statfile)
    cnt = 0
    with open(statfile, encoding = 'utf-8') as f:
        while f.readline():
            cnt += 1
        return cnt

# more表示含有更多行数的文件
def diff(more, cnt, less):
    difflist = []
    with open(less, encoding = 'utf-8') as l:
        with open(more, encoding = 'utf-8') as m:
            lines = l.readlines()
            for i, line in enumerate(lines):
                if line.strip() != m.readline().strip():
                    difflist.append(i)
    if cnt - i > 1:
        difflist.extend(range(i + 1, cnt))
    return [no+1 for no in difflist]

# 返回的结果行号从1开始
# list 表示 fileA 和 fileB 不同的行的编号
def file_diff_line_nos(fileA, fileB):
    try:
        cntA = statLineCnt(fileA)
        cntB = statLineCnt(fileB)
        if cntA > cntB:
            return diff(fileA, cntA, cntB)
        return diff(fileB, cntB, fileA)
    except Exception as e:
        print(e)

if __name__ == '__main__':
    import os
    print(os.getcwd())
    '''
    例子：
    fileA = "'Hello world!!!!''\
            'nice to meet you'\
            'yes'\
            'no1'\
            'jack'"
    fileB = "'hello world!!!!''\
            'nice to meet you'\
            'yes'"
    '''
    diff = file_diff_line_nos('./a.txt', './b.txt')
    print(diff)

D:\Learn\Code\py_60d
文件名./a.txt
文件名./b.txt
'_io.TextIOWrapper' object has no attribute 'readines'
None
