### CRUD (資料操作)

#### Part A - 純文字檔 (欄位結構會變動)
以 `with` + `for` + `if` + `str` 操作 test.dat 文字檔：

In [6]:
# import library
from pathlib import Path # a build-in module for managing path 

test_file = Path('./dataset/test.dat')

with open(test_file, 'r') as f:
    lines = f.readlines()

# print it out
for line in lines:
    print(line.strip()) # strip -> 移除前後的空白及換行符號

# simple explanation
for i, line in enumerate(lines):
    if i == 0:
        print('yyyymmddhhmm sssss')
        print(line.strip())
    elif i < 2:
        print('sta  mm sssss pwei sssss swei')
        print(line.strip())

2024 4 1 0 2 20.54
FUSS  2 25.69 1.00  0.00 0.00
LATB  2 25.74 1.00  0.00 0.00
NACB  2 25.76 1.00  0.00 0.00
NACB  2  0.00 0.00 28.64 1.00
FUSS  2  0.00 0.00 28.67 1.00
SM09  2 28.73 1.00  0.00 0.00
WUSB  2 30.27 1.00  0.00 0.00
B071  2 37.28 1.00  0.00 0.00
ECB   2  0.00 0.00 56.77 1.00

yyyymmddhhmm sssss
2024 4 1 0 2 20.54
sta  mm sssss pwei sssss swei
FUSS  2 25.69 1.00  0.00 0.00


***A-1*** 提取事件時間 (for-loop + if + split)

In [12]:
from datetime import datetime # module for processing datetime 

for line in lines:
    strip_line = line.strip()
    if strip_line.startswith('2024'):
        buffers = strip_line.split()
        year = int(buffers[0])
        month = int(buffers[1])
        day = int(buffers[2])
        hour = int(buffers[3])
        minute = int(buffers[4])
        second = int(buffers[5][:2])
        microsecond = int(float(buffers[5][2:]) * 1000000)
        print(f"Event time: {datetime(year, month, day, hour, minute, second, microsecond)}")


Event time: 2024-04-01 00:02:20.540000


***A-2-1*** 提取與此事件相關的 stations (list 收集)

In [24]:
stations = [] # 初始化一個list，用來裝被切出來的station
for line in lines:
    strip_line = line.strip()
    if strip_line and strip_line[0].isalpha():
        station = strip_line[:4].strip()
        stations.append(station)

print(stations)

['FUSS', 'LATB', 'NACB', 'NACB', 'FUSS', 'SM09', 'WUSB', 'B071', 'ECB']


***A-2-2*** 提取與此事件相關的 stations (set 收集，裡面不重複)

In [26]:
unique_stations = set(stations)
print(unique_stations)

{'WUSB', 'ECB', 'SM09', 'FUSS', 'NACB', 'B071', 'LATB'}


***A-3*** 分開記錄到 P 與 S 波的測站 (多層 if)

In [27]:
p_stations = []
s_stations = []
for line in lines:
    strip_line = line.strip()
    if strip_line and strip_line[0].isalpha():
        station = strip_line[:4].strip()
        p_weight = strip_line[14:19]
        s_weight = strip_line[-4:]
        if float(p_weight) == 1.0:
            p_stations.append(station)
        elif float(s_weight) == 1.0:
            s_stations.append(station)

print(f"有記錄到P phase的站: {p_stations}")
print(f"有記錄到S phase的站: {s_stations}")

有記錄到P phase的站: ['FUSS', 'LATB', 'NACB', 'SM09', 'WUSB', 'B071']
有記錄到S phase的站: ['NACB', 'FUSS', 'ECB']


***A-4*** 結合日期與分秒補全時間 (變數傳遞)

In [32]:
from datetime import datetime # module for processing datetime 

for line in lines:
    strip_line = line.strip()

    # initialize the varaibles
    year = 1
    month = 1
    day = 1
    hour = 1
    phase_min = 1
    phase_second = 1
    phase_microsecond = 1

    if strip_line.startswith('2024'):
        buffers = strip_line.split()
        year = int(buffers[0])
        month = int(buffers[1])
        day = int(buffers[2])
        hour = int(buffers[3])
        minute = int(buffers[4])
        second = int(buffers[5][:2])
        microsecond = int(float(buffers[5][2:]) * 1000000)
        print(f"Event time: {datetime(year, month, day, hour, minute, second, microsecond)}")
    elif strip_line and strip_line[0].isalpha():
        wave_buffers = strip_line.split()
        station = wave_buffers[0]
        p_weight = wave_buffers[3]
        s_weight = wave_buffers[-1]
        if float(p_weight) == 1.0:
            phase_min = int(wave_buffers[1])
            phase_second = int(wave_buffers[2][:2])
            phase_microsecond = int(float(wave_buffers[2][2:]) * 1000000)
        elif float(s_weight) == 1.0:
            phase_min = int(wave_buffers[1])
            phase_second = int(wave_buffers[4][:2])
            phase_microsecond = int(float(wave_buffers[4][2:]) * 1000000)
        
        print(f"{station}'s arrival time: {datetime(year, month, day, hour, phase_min, phase_second, phase_microsecond)}")
        

Event time: 2024-04-01 00:02:20.540000
FUSS's arrival time: 0001-01-01 01:02:25.690000
LATB's arrival time: 0001-01-01 01:02:25.740000
NACB's arrival time: 0001-01-01 01:02:25.760000
NACB's arrival time: 0001-01-01 01:02:28.640000
FUSS's arrival time: 0001-01-01 01:02:28.670000
SM09's arrival time: 0001-01-01 01:02:28.730000
WUSB's arrival time: 0001-01-01 01:02:30.270000
B071's arrival time: 0001-01-01 01:02:37.280000
ECB's arrival time: 0001-01-01 01:02:56.770000


***A-5*** 用 dict 記錄事件的分秒與測站資訊

In [36]:
from datetime import datetime # module for processing datetime 

information_dict = {}

for line in lines:
    strip_line = line.strip()

    # initialize the varaibles
    year = 1
    month = 1
    day = 1
    hour = 1
    phase_min = 1
    phase_second = 1
    phase_microsecond = 1

    if strip_line.startswith('2024'):
        buffers = strip_line.split()
        year = int(buffers[0])
        month = int(buffers[1])
        day = int(buffers[2])
        hour = int(buffers[3])
        minute = int(buffers[4])
        second = int(buffers[5][:2])
        microsecond = int(float(buffers[5][2:]) * 1000000)
        
    elif strip_line and strip_line[0].isalpha():
        wave_buffers = strip_line.split()
        station = wave_buffers[0]
        p_weight = wave_buffers[3]
        s_weight = wave_buffers[-1]
        if float(p_weight) == 1.0:
            phase_min = int(wave_buffers[1])
            phase_second = int(wave_buffers[2][:2])
            phase_microsecond = int(float(wave_buffers[2][2:]) * 1000000)
        elif float(s_weight) == 1.0:
            phase_min = int(wave_buffers[1])
            phase_second = int(wave_buffers[4][:2])
            phase_microsecond = int(float(wave_buffers[4][2:]) * 1000000)
        
        information_dict[station] = datetime(year, month, day, hour, phase_min, phase_second, phase_microsecond)

print(information_dict)

{'FUSS': datetime.datetime(1, 1, 1, 1, 2, 28, 670000), 'LATB': datetime.datetime(1, 1, 1, 1, 2, 25, 740000), 'NACB': datetime.datetime(1, 1, 1, 1, 2, 28, 640000), 'SM09': datetime.datetime(1, 1, 1, 1, 2, 28, 730000), 'WUSB': datetime.datetime(1, 1, 1, 1, 2, 30, 270000), 'B071': datetime.datetime(1, 1, 1, 1, 2, 37, 280000), 'ECB': datetime.datetime(1, 1, 1, 1, 2, 56, 770000)}
