In [1]:
import pandas as pd
import os

# 工作目录信息

In [2]:
# 当前工作目录
os.getcwd()

'/Users/yijiu/Young/GitHub/Triceratops-org/myPython'

In [3]:
path1 = os.path.abspath('.')   # 当前脚本所在路径 
path2 = os.path.abspath('..')  # 上一级路径

print(path1)
print(path2)

/Users/yijiu/Young/GitHub/Triceratops-org/myPython
/Users/yijiu/Young/GitHub/Triceratops-org


# 数据准备
## 文件夹创建
用于存放演示数据

In [4]:
cwd = os.getcwd()
data_filefolder_name = 'data'         # data: 存放原始数据
result_filefolder_name = 'result'     # result: 存放结果数据


def mk_file_folders(file_folder_names):
    for file_folder_name in file_folder_names:
        if os.path.exists(os.path.join(cwd, file_folder_name))==False:
            os.mkdir(os.path.join(cwd, file_folder_name))
            print(f'文件夹{file_folder_name}创建完成!')
        else:
            print(f'文件夹{file_folder_name}已经存在!')
        
    
mk_file_folders([data_filefolder_name, result_filefolder_name])   

文件夹data已经存在!
文件夹result已经存在!


## 各类型数据demo

In [5]:
import seaborn as sns

df = sns.load_dataset('penguins')

file_name = './data/penguins'

df.to_csv(file_name+'.csv', index=False)
df.to_excel(file_name+'.xlsx', index=False)
df.to_json(file_name+'.json', orient='columns')

# 数据读取
## 文件地址形式

In [6]:
# 文件地址表现方法
file_path0 = 'data/penguins.csv'

file_path1 = './data/penguins.csv'
# file_path2 = '/Users/...data/penguins.csv'    
# file_path2 = 'C:/Users/.../data/penguins.csv'           # windows

# file_path3 = r'C:\Users\...data\penguins.csv'          # windows  
# file_path4 = 'C:\\Users\\...data\\penguins.csv'        # windows


data = pd.read_csv(file_path0)
data.head()

Unnamed: 0,species,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex
0,Adelie,Torgersen,39.1,18.7,181.0,3750.0,Male
1,Adelie,Torgersen,39.5,17.4,186.0,3800.0,Female
2,Adelie,Torgersen,40.3,18.0,195.0,3250.0,Female
3,Adelie,Torgersen,,,,,
4,Adelie,Torgersen,36.7,19.3,193.0,3450.0,Female


## 不同数据类型读取为DF
### csv

In [7]:
file_path_csv = './data/penguins.csv'
data = pd.read_csv(file_path_csv)

data.head()

Unnamed: 0,species,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex
0,Adelie,Torgersen,39.1,18.7,181.0,3750.0,Male
1,Adelie,Torgersen,39.5,17.4,186.0,3800.0,Female
2,Adelie,Torgersen,40.3,18.0,195.0,3250.0,Female
3,Adelie,Torgersen,,,,,
4,Adelie,Torgersen,36.7,19.3,193.0,3450.0,Female


### excel

In [8]:
file_path_excel = './data/penguins.xlsx'
data = pd.read_excel(file_path_excel)

data.head()

Unnamed: 0,species,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex
0,Adelie,Torgersen,39.1,18.7,181.0,3750.0,Male
1,Adelie,Torgersen,39.5,17.4,186.0,3800.0,Female
2,Adelie,Torgersen,40.3,18.0,195.0,3250.0,Female
3,Adelie,Torgersen,,,,,
4,Adelie,Torgersen,36.7,19.3,193.0,3450.0,Female


### json

In [9]:
file_path_json = './data/penguins.json'
data = pd.read_json(file_path_json, orient='columns')

data.head()

Unnamed: 0,species,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex
0,Adelie,Torgersen,39.1,18.7,181.0,3750.0,Male
1,Adelie,Torgersen,39.5,17.4,186.0,3800.0,Female
2,Adelie,Torgersen,40.3,18.0,195.0,3250.0,Female
3,Adelie,Torgersen,,,,,
4,Adelie,Torgersen,36.7,19.3,193.0,3450.0,Female


# 数据保存

In [10]:
# csv
data.to_csv('./result/penguins.csv', index=False)

# Excel
data.to_excel('./result/penguins.xlsx', index=False)

# Json
data.to_json('./result/penguins.json', orient='columns')

# 其他场景
## 合并文件夹下多个csv文件

In [11]:
def combine_files(path, key_word):
    '''合并文件夹下，文件名字包含关键字的文件，csv格式
    '''
    files_list = os.listdir(path)
    files_list = [file_name for file_name in files_list if 'csv' in file_name]      # 过滤出csv文件
    files_list = [file_name for file_name in files_list if key_word in file_name]   # 过滤出关键词文件
    
    result = pd.DataFrame()
    combined_file_cnt = 0
    
    for file_name in files_list:
        try:
            data = pd.read_csv(path + file_name)    # 根据实际情况调整
            result = pd.concat([data, result], axis=0)
            print(f'文件已合并：{file_name}')
            combined_file_cnt = combined_file_cnt + 1
        except OSError as e:
            print(f'Error: {e.strerror}')
    print(f'合并了{combined_file_cnt}个文件!')
    return result

In [12]:
path = './data/'
fl = os.listdir(path)
csv_fl = [file_name for file_name in fl if 'csv' in file_name]

csv_fl

['penguins.csv']

In [13]:
path = './data/'
file_name_key_txt = 'penguins'

# 文件名包含'penguins'的csv文件合并。
df = combine_files(path, 'penguins')
df.head()

文件已合并：penguins.csv
合并了1个文件!


Unnamed: 0,species,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex
0,Adelie,Torgersen,39.1,18.7,181.0,3750.0,Male
1,Adelie,Torgersen,39.5,17.4,186.0,3800.0,Female
2,Adelie,Torgersen,40.3,18.0,195.0,3250.0,Female
3,Adelie,Torgersen,,,,,
4,Adelie,Torgersen,36.7,19.3,193.0,3450.0,Female


## txt文件
### 读

In [None]:
# # 设置要打开的txt文件的路径
# target_file = './data/a_txt_org_file.txt'
# # 打开目标txt文件
# file = open(target_file, 'r', encoding='utf-8')
# # 使用 文件对象.read() 方法读取文件内容
# content = file.read()
# # 打印文件内容
# print(content)
# # 关闭文件对象

### 写

In [None]:
# # 目标文件是工作文件夹内的06_01_2020会议记录.txt文件
# target_file = './result_data/a_target_txt_file.txt'
# # 需要添加的内容列表
# content_list = ["会议记录：陈知枫", "会议复盘：徐小刚", "会议室清洁：廖雨"]
# # 使用 open() 函数打开 06_01_2020会议记录.txt 文本文件，并使用追加模式'a'，记得将编码设置为'utf-8'
# file = open(target_file, 'a', encoding='utf-8')
# # 使用for循环遍历内容列表
# for content in content_list:
#     # 使用方法，文件对象.write()
#     file.write(ccontent + '\n')
# # 关闭文件对象
# file.close()

# 文件删除
删除演示文件

In [14]:
# 删除前文件目录
os.listdir('./data/')

['penguins.xlsx', 'penguins.json', 'penguins.csv']

In [15]:
def delete_files(path, key_word):
    files_list = os.listdir(path)
    files_list = [file_name for file_name in files_list if key_word in file_name]   # 过滤出文件
    files_deleted_cnt = 0
    
    for file_name in files_list:
        file_path = os.path.join(path, file_name)
        try:
            os.remove(file_path)
            files_deleted_cnt = files_deleted_cnt + 1
            print(f'文件已删除：{file_path}')
        except OSError as e:
            print(f'Error: {e.strerror}')
    print('-'*40)
    print(f'文件夹{path}:')
    print(f'合计删除{files_deleted_cnt}个文件！')   
    print('-'*60)

In [16]:
path = './data/'
key_word = 'penguins'

# 演示数据
delete_files(path, key_word)

path = './result/'
key_word = 'penguins'

# 演示数据
delete_files(path, key_word)

文件已删除：./data/penguins.xlsx
文件已删除：./data/penguins.json
文件已删除：./data/penguins.csv
----------------------------------------
文件夹./data/:
合计删除3个文件！
------------------------------------------------------------
文件已删除：./result/penguins.xlsx
文件已删除：./result/penguins.json
文件已删除：./result/penguins.csv
----------------------------------------
文件夹./result/:
合计删除3个文件！
------------------------------------------------------------


In [17]:
# 删除后文件目录
os.listdir('./data/')

[]