# 划分训练-测试集



## 导入工具包

In [1]:
import os
import shutil
import random

from tqdm import tqdm

## 指定数据集路径

In [2]:
Dataset_Path = 'LSR16F656'

## 查看数据集目录结构

In [9]:
# os.chdir('mmsegmentation/LSR16F656')
os.getcwd()
os.chdir('../')

In [10]:
import seedir as sd
sd.seedir(Dataset_Path, style='emoji', depthlimit=1)

📁 LSR16F656/
├─📁 img_dir/
└─📁 ann_dir/


## 创建文件夹

In [11]:
os.chdir(Dataset_Path)
os.mkdir('train')
os.mkdir('val')

In [12]:
len(os.listdir('img_dir'))

656

In [13]:
len(os.listdir('ann_dir'))

656

## 删除系统自动生成的多余文件

### 查看待删除的多余文件

In [14]:
!find . -iname '__MACOSX'

In [15]:
!find . -iname '.DS_Store'

In [16]:
!find . -iname '.ipynb_checkpoints'

### 删除多余文件

In [17]:
!for i in `find . -iname '__MACOSX'`; do rm -rf $i;done

In [18]:
!for i in `find . -iname '.DS_Store'`; do rm -rf $i;done

In [19]:
!for i in `find . -iname '.ipynb_checkpoints'`; do rm -rf $i;done

### 验证多余文件已删除

In [20]:
!find . -iname '__MACOSX'

In [21]:
!find . -iname '.DS_Store'

In [22]:
!find . -iname '.ipynb_checkpoints'

## 在图像文件夹中，划分训练集和测试集

In [23]:
test_frac = 0.2  # 测试集比例
random.seed(123) # 随机数种子，便于复现

In [24]:
folder = 'img_dir'

In [25]:
img_paths = os.listdir(folder)
random.shuffle(img_paths) # 随机打乱

val_number = int(len(img_paths) * test_frac) # 测试集文件个数
train_files = img_paths[val_number:]         # 训练集文件名列表
val_files = img_paths[:val_number]           # 测试集文件名列表

print('数据集文件总数', len(img_paths))
print('训练集文件个数', len(train_files))
print('测试集文件个数', len(val_files))

数据集文件总数 656
训练集文件个数 525
测试集文件个数 131


## 将训练集图像移动至`train`目录

In [26]:
for each in tqdm(train_files):
    src_path = os.path.join(folder, each)
    dst_path = os.path.join('train', each)
    shutil.move(src_path, dst_path)

100%|██████████| 525/525 [00:00<00:00, 976.14it/s] 


## 将测试集图像移动至`val`目录

In [27]:
for each in tqdm(val_files):
    src_path = os.path.join(folder, each)
    dst_path = os.path.join('val', each)
    shutil.move(src_path, dst_path)

100%|██████████| 131/131 [00:00<00:00, 2792.04it/s]


In [28]:
len(os.listdir('train')) + len(os.listdir('val'))

656

## 将`train`和`val`剪切至`img_dir`

In [29]:
shutil.move('train', 'img_dir/train')
shutil.move('val', 'img_dir/val')

'img_dir/val'

## 在标注文件夹中，划分训练集和测试集

In [30]:
folder = 'ann_dir'

In [31]:
os.mkdir('train')
os.mkdir('val')

## 将训练集标注移动至`train`目录

In [32]:
for each in tqdm(train_files):
    src_path = os.path.join(folder, each.split('.')[0]+'.png')
    dst_path = os.path.join('train', each.split('.')[0]+'.png')
    shutil.move(src_path, dst_path)

100%|██████████| 525/525 [00:01<00:00, 372.29it/s]


## 将测试集标注移动至`val`目录

In [33]:
for each in tqdm(val_files):
    src_path = os.path.join(folder, each.split('.')[0]+'.png')
    dst_path = os.path.join('val', each.split('.')[0]+'.png')
    shutil.move(src_path, dst_path)

100%|██████████| 131/131 [00:00<00:00, 2122.20it/s]


In [34]:
len(os.listdir('train')) + len(os.listdir('val'))

656

## 将`train`和`val`剪切至`ann_dir`

In [35]:
shutil.move('train', 'ann_dir/train')
shutil.move('val', 'ann_dir/val')

'ann_dir/val'

## 删除系统自动生成的多余文件

In [36]:
os.chdir('../')

### 查看待删除的多余文件

In [38]:
!find . -iname '__MACOSX'

In [39]:
!find . -iname '.DS_Store'

In [40]:
!find . -iname '.ipynb_checkpoints'

### 删除多余文件

In [41]:
!for i in `find . -iname '__MACOSX'`; do rm -rf $i;done

In [42]:
!for i in `find . -iname '.DS_Store'`; do rm -rf $i;done

In [43]:
!for i in `find . -iname '.ipynb_checkpoints'`; do rm -rf $i;done

### 验证多余文件已删除

In [44]:
!find . -iname '__MACOSX'

In [45]:
!find . -iname '.DS_Store'

In [46]:
!find . -iname '.ipynb_checkpoints'

## 得到划分好训练集测试集的完整语义分割数据集