# Description

This is an automation script to categorize raw data captured in each experiment.

This script can be divided into four parts:

- Stage 0: User Settings: Modify settings to fit your experiment data.
- Stage 1: Collect metadata.
- Stage 2: Check files to make sure the numbers of each type of files are reasonable.
- Stage 3: Categorize files in chronological order, and organize with experiment names.

Before you go to stage 3, make sure you have already uploaded (manually or using py-script) all the raw data to database into specific directories, and removed the redundant files manually.

In [1]:
import os
import yaml
import shutil

# ===================== User Settings =====================
dates = [
    # "2024-03-19",
    # "2024-03-20",
    # "2024-05-14",
    "2024-05-20"
]

target_folders = [
    "mi2log",
    "mi2log_xml",
    "server_pcap",
    "client_pcap",
]

common_folders = [
    "gps",
    "sync",
]

# ===================== Utils =====================
def makedir(dirpath):
    if not os.path.isdir(dirpath):
        print('    makedir:', dirpath)
        os.makedirs(dirpath)

def savecopy(filepath, targetdir, filename):
    # filename can be different from basename of filepath, can be used to rename a file.
    makedir(targetdir)
    print("    cp -p", filepath, os.path.join(targetdir, filename))
    shutil.copy2(filepath, os.path.join(targetdir, filename))  # reserve the last modified datetime and other metadata

def savemove(filepath, targetdir, filename):
    # filename can be different from basename of filepath, can be used to rename a file.
    makedir(targetdir)
    print("    mv", filepath, os.path.join(targetdir, filename))
    shutil.move(filepath, os.path.join(targetdir, filename))

# def delete(filepath):
#     if not os.path.exists(filepath):
#         print("rm: '{}': No such file or directory".format(filepath))
#         return
#     print("rm", filepath)
#     os.remove(filepath)

# def movedir(dirpath, targetdir, dirname):
#     # dirname can be different from basename of dirpath, can be used to rename a directory.
#     makedir(targetdir)
#     print("mv", dirpath, os.path.join(targetdir, dirname))
#     shutil.move(dirpath, os.path.join(targetdir, dirname))

# def copydir(dirpath, targetdir, dirname):
#     # dirname can be different from basename of dirpath, can be used to rename a directory.
#     makedir(targetdir)
#     print("copy", dirpath, os.path.join(targetdir, dirname))
#     shutil.copytree(dirpath, os.path.join(targetdir, dirname))

def deletedir_empty(dirpath):
    if not os.path.exists(dirpath):
        print("rmdir: '{}': No such file or directory".format(dirpath))
        return
    print("rmdir", dirpath)
    os.rmdir(dirpath)

# def deletedir_nonempty(dirpath):
#     if not os.path.exists(dirpath):
#         print("rm -rf: '{}': No such file or directory".format(dirpath))
#         return
#     print("rm -rf", dirpath)
#     shutil.rmtree(dirpath, ignore_errors=True)

def get_folder_size(folder_path):
    total_size = 0
    for dirpath, dirnames, filenames in os.walk(folder_path):
        for f in filenames:
            fp = os.path.join(dirpath, f)
            # 跳過符號連結
            if not os.path.islink(fp):
                total_size += os.path.getsize(fp)
    unit_mapping = {0: 'bytes', 1: 'KB', 2: 'MB', 3: 'GB', 4: 'TB', 5: 'PB'}
    cnt = 0
    while total_size > 1000:
        total_size /= 1000
        cnt += 1
    unit = unit_mapping[cnt]
    total_size = f'{round(total_size, 2)} {unit}'
    return total_size

def get_file_size(file_path):
    if os.path.isfile(file_path):
        size = os.path.getsize(file_path)
        unit_mapping = {0: 'bytes', 1: 'KB', 2: 'MB', 3: 'GB', 4: 'TB', 5: 'PB'}
        cnt = 0
        while size > 1000:
            size /= 1000
            cnt += 1
        unit = unit_mapping[cnt]
        size = f'{round(size, 2)} {unit}'
        return size
    else:
        raise ValueError(f"{file_path} is not a valid file.")

def get_size(path):
    if os.path.isdir(path):
        return get_folder_size(path)
    elif os.path.isfile(path):
        return get_file_size(path)
    else:
        raise ValueError(f"{path} is not a valid file or directory.")

# Stage 1: Collect Metadata

In [2]:
dates = sorted(dates)
with open(os.path.join(os.getcwd(), "db_path.txt"), "r") as f:
        PATHS_TO_DATABASE = [s.strip() for s in f.readlines()]
    
date_paths = []

for date in dates:
    not_found_in_database = []
    
    for db_path in PATHS_TO_DATABASE:
        date_path = os.path.join(db_path, date)
        if os.path.isdir(date_path):
            date_paths.append(date_path)
        else:
            not_found_in_database.append(date_path)
    
    if len(not_found_in_database) == len(PATHS_TO_DATABASE):
        error_message = "[Errno 2] No such file or directory:\n"
        for date_path in not_found_in_database:
            error_message += "  '{}'\n".format(date_path)
        raise FileNotFoundError(error_message.strip())

metadatas = {}

for i, date_path in enumerate(date_paths):
    print('------------------')
    print(os.path.basename(date_path), ': ', os.path.dirname(date_path), sep='')
    
    yml_file = os.path.join(date_path, os.path.basename(date_path) + '.yml')
    with open(yml_file, 'r') as f:
        data = yaml.safe_load(f)
    
    metadatas[date_path] = [0]
    
    for j, (exp_name, exp) in enumerate(data.items()):
        if j > 0: print('  ------------------')
        print(' ', exp_name, '->', 'Skip:', exp['skip'], '|', 'UE:', exp['ue'], '|', 'Laptop:', exp['laptop'], '|', 'Route:', exp['route'])
        print('   ', exp['devices'])
        if exp['skip']:
            continue
        trips = []
        for key, value in exp['ods'].items():
            print('     ', key, value)
            if key > 0: trips.append(f'#{key:02d}')
        
        metadatas[date_path].append({
            'exp_name': exp_name,
            'ue': exp['ue'],
            'laptop': exp['laptop'],
            'route': exp['route'],
            'devices': list(exp['devices']),
            'trips': trips[:]
        })
        metadatas[date_path][0] += len(trips)
    
    if i == len(date_paths) - 1: print('------------------')

------------------
2024-05-20: /home/wmnlab/E/database
  Modem_Action_Test_v6_A -> Skip: False | UE: Modem | Laptop: lpt3 | Route: BR
    {'qc00': 'B1B8', 'qc01': 'LTE', 'qc02': 'DBL_v6_NSA', 'qc05': 'DBL_v6_LTE'}
      0 ['起站', '迄站', '出發時間(hh:mm)', '備註']
      1 ['萬芳醫院', '動物園', '15:31', []]
      2 ['動物園', '萬芳醫院', '15:39', []]
      3 ['萬芳醫院', '動物園', '15:50', []]
      4 ['動物園', '萬芳醫院', '15:58', []]
      5 ['萬芳醫院', '動物園', '16:05', []]
      6 ['動物園', '萬芳醫院', '16:14', []]
      7 ['萬芳醫院', '動物園', '16:25', []]
      8 ['動物園', '萬芳醫院', '16:35', []]
      9 ['萬芳醫院', '動物園', '16:43', []]
      10 ['動物園', '萬芳醫院', '16:51', []]
------------------


# Stage 2: Check Files

檢查資料的數量是否正確

- mi2log, mi2log_xml: 機台數量*總趟次
- client_pcap: 機台數量*總趟次 (Bilink)
- server_pcap: 機台數量*總趟次\*2 or 1 (Uplink/Downlink, Bi-link)

In [3]:
for i, (date_path, metadata) in enumerate(metadatas.items()):
    print('-----------------------------')
    print(date_path, '({})'.format(metadata[0]), get_size(date_path))
    
    for j, exp in enumerate(metadata[1:]):
        if j > 0: print('  -----------------------------')
        print(' ', exp['exp_name'], '->', 'UE:', exp['ue'], '|', 'Laptop:', exp['laptop'], '|', 'Route:', exp['route'])
        print('   ', len(exp['devices']), exp['devices'])
        print('     ', len(exp['trips']), exp['trips'])
    
    for target_folder in target_folders:
        filenames = sorted(os.listdir(os.path.join(date_path, target_folder)))
        print('  ------------------ target folder')
        print(' ', target_folder, '({})'.format(len(filenames)), get_size(os.path.join(date_path, target_folder)))
        for filename in filenames:
            print('   ', filename, get_size(os.path.join(date_path, target_folder, filename)))
    
    for common_folder in common_folders:
        if not os.path.isdir(os.path.join(date_path, common_folder)):
            print('  ------------------ common folder')
            print(' ', common_folder, 'does not exist!')
            continue
        filenames = sorted(os.listdir(os.path.join(date_path, common_folder)))
        print('  ------------------ common folder')
        print(' ', common_folder, '({})'.format(len(filenames)), get_size(os.path.join(date_path, common_folder)))
        for filename in filenames:
            print('   ', filename, get_size(os.path.join(date_path, common_folder, filename)))
    
    if i == len(date_paths) - 1: print('-----------------------------')

-----------------------------
/home/wmnlab/E/database/2024-05-20 (10) 8.75 GB
  Modem_Action_Test_v6_A -> UE: Modem | Laptop: lpt3 | Route: BR
    4 ['qc00', 'qc01', 'qc02', 'qc05']
      10 ['#01', '#02', '#03', '#04', '#05', '#06', '#07', '#08', '#09', '#10']
  ------------------ target folder
  mi2log (40) 17.33 MB
    diag_log_qc00_2024-05-20_15-30-55.mi2log 589.09 KB
    diag_log_qc00_2024-05-20_15-39-09.mi2log 563.92 KB
    diag_log_qc00_2024-05-20_15-50-24.mi2log 680.91 KB
    diag_log_qc00_2024-05-20_15-58-27.mi2log 573.81 KB
    diag_log_qc00_2024-05-20_16-05-22.mi2log 656.75 KB
    diag_log_qc00_2024-05-20_16-14-19.mi2log 663.6 KB
    diag_log_qc00_2024-05-20_16-25-06.mi2log 598.42 KB
    diag_log_qc00_2024-05-20_16-35-51.mi2log 520.74 KB
    diag_log_qc00_2024-05-20_16-43-18.mi2log 648.52 KB
    diag_log_qc00_2024-05-20_16-51-20.mi2log 566.22 KB
    diag_log_qc01_2024-05-20_15-30-55.mi2log 337.87 KB
    diag_log_qc01_2024-05-20_15-39-09.mi2log 315.86 KB
    diag_log_qc01_202

# Stage 3: Categorize

In [4]:
for i, (date_path, metadata) in enumerate(metadatas.items()):
    print('------------------')
    print(date_path, '({})'.format(metadata[0]))
    
    for j, exp in enumerate(metadata[1:]):
        if j > 0: print('  ------------------')
        print(' ', exp['exp_name'], '->', 'UE:', exp['ue'], '|', 'Laptop:', exp['laptop'], '|', 'Route:', exp['route'])
        print('   ', len(exp['devices']), exp['devices'])
        print('     ', len(exp['trips']), exp['trips'])
        for target_folder in target_folders:
            src_dirpath = os.path.join(date_path, target_folder)
            print('  ------------------')
            print(' ', target_folder, '({})'.format(len(os.listdir(src_dirpath))))
            for dev in exp['devices']:
                print('   ---', dev)
                filenames = sorted([s for s in os.listdir(src_dirpath) if dev in s])
                for trip in exp['trips']:
                    filename = filenames.pop(0)
                    dest_dirpath = os.path.join(date_path, exp['exp_name'], dev, trip, 'raw')
                    savemove(os.path.join(src_dirpath, filename), dest_dirpath, filename)
    
    if i == len(date_paths) - 1: print('------------------')

for i, (date_path, metadata) in enumerate(metadatas.items()):
    for target_folder in target_folders:
        src_dirpath = os.path.join(date_path, target_folder)
        deletedir_empty(src_dirpath)

------------------
/home/wmnlab/E/database/2024-05-20 (10)
  Modem_Action_Test_v6_A -> UE: Modem | Laptop: lpt3 | Route: BR
    4 ['qc00', 'qc01', 'qc02', 'qc05']
      10 ['#01', '#02', '#03', '#04', '#05', '#06', '#07', '#08', '#09', '#10']
  ------------------
  mi2log (40)
   --- qc00
    makedir: /home/wmnlab/E/database/2024-05-20/Modem_Action_Test_v6_A/qc00/#01/raw
    mv /home/wmnlab/E/database/2024-05-20/mi2log/diag_log_qc00_2024-05-20_15-30-55.mi2log /home/wmnlab/E/database/2024-05-20/Modem_Action_Test_v6_A/qc00/#01/raw/diag_log_qc00_2024-05-20_15-30-55.mi2log
    makedir: /home/wmnlab/E/database/2024-05-20/Modem_Action_Test_v6_A/qc00/#02/raw
    mv /home/wmnlab/E/database/2024-05-20/mi2log/diag_log_qc00_2024-05-20_15-39-09.mi2log /home/wmnlab/E/database/2024-05-20/Modem_Action_Test_v6_A/qc00/#02/raw/diag_log_qc00_2024-05-20_15-39-09.mi2log
    makedir: /home/wmnlab/E/database/2024-05-20/Modem_Action_Test_v6_A/qc00/#03/raw
    mv /home/wmnlab/E/database/2024-05-20/mi2log/diag_