In [4]:
import os
import glob
import numpy as np
import pandas as pd
from datetime import datetime, timedelta

### csvファイルの読み込み
 - filenameのパスはそれぞれファイルが保存されている部分を指定してください

In [5]:
def load_csv(filename):
    data = pd.read_csv(filename, header=None)
    
    return data

In [6]:
filename = '/docker/mnt/d/LM/Python_seminar/2022/discharge_data/MRCdata/MRC/KH_014501_M_QH_E03.csv'
data = load_csv(filename)
print(data)

        0      1   2  3                          4       5       6   7
0      KH  14501  QH  M  1910-01-01T07:00:00+07:00  5988.0  m3\sec   O
1      KH  14501  QH  M  1910-01-02T07:00:00+07:00  5629.0  m3\sec   O
2      KH  14501  QH  M  1910-01-03T07:00:00+07:00  5413.0  m3\sec   O
3      KH  14501  QH  M  1910-01-04T07:00:00+07:00  5377.0  m3\sec   O
4      KH  14501  QH  M  1910-01-05T07:00:00+07:00  5222.0  m3\sec   O
...    ..    ...  .. ..                        ...     ...     ...  ..
34694  KH  14501  QH  M  2004-12-27T07:00:00+07:00  4299.0  m3\sec   O
34695  KH  14501  QH  M  2004-12-28T07:00:00+07:00  4299.0  m3\sec   O
34696  KH  14501  QH  M  2004-12-29T07:00:00+07:00  4299.0  m3\sec   O
34697  KH  14501  QH  M  2004-12-30T07:00:00+07:00  4262.0  m3\sec   O
34698  KH  14501  QH  M  2004-12-31T07:00:00+07:00  4155.0  m3\sec   O

[34699 rows x 8 columns]


### 保存用のnumpy.ndarrayを作成
1. datetimeオブジェクトで1800/1/1から2039/12/31までの全カレンダーを取得
2. .csvファイルの時間部分(index=4)の中の'年月日'部分を取り出す
    - split(str): strで文字列を分割
    - split後の前半([0])を取り出す
3. date_begin, date_endを計算
    - date_begin: .csvファイルの観測が存在する最初の年月日
    - date_end: .csvファイルの観測が存在する最後の年月日
    - strをdatetimeオブジェクトに変換し、1800/1/1からの日数に変換
4. 保存用のnumpy.adarrayファイルを作成

In [7]:
def make_npy(dt):
    dt_s = datetime(1800, 1, 1)
    dt_e = datetime(2039, 12,31)
    delta = (dt_e - dt_s).days + 1
    
    dt_str1 = dt.iloc[0,4].split('T')[0]
    date_begin = (datetime.strptime(dt_str1, '%Y-%m-%d') - dt_s).days
    
    dt_str2 = dt.iloc[-1,4].split('T')[0]
    date_end = (datetime.strptime(dt_str2, '%Y-%m-%d') - dt_s).days
    
    npy = np.zeros((delta, 2), dtype=object)
    
    t = dt_s
    
    return t, date_begin, date_end, npy

In [8]:
filename = '/docker/mnt/d/LM/Python_seminar/2022/discharge_data/MRCdata/MRC/KH_014501_M_QH_E03.csv'
data = load_csv(filename)
t, date_begin, date_end, npy = make_npy(data)
print(f't: {t}, date_begin: {date_begin}, date_end:{date_end}')

t: 1800-01-01 00:00:00, date_begin: 40176, date_end:74874


### 空numpy.ndarrayにデータを格納
- loop(1): 観測が存在する最初の年までは-9999を格納
- loop(2): 観測が存在する部分は、値が0以上なら観測値を格納
- loop(3): 観測が存在する最後の年からnumpy.ndarrayの最後まで-9999を格納

In [9]:
def insert_value(dt, t, date_begin, date_end, npy):
    for i in range(date_begin):
        npy[i,0] = t.strftime("%Y-%m-%d")
        npy[i,1] = -9999
        
        t += timedelta(days=1)
        
    for i in range(date_begin, date_end):
        value = dt.iloc[i - date_begin, 5]
        npy[i,0] = t.strftime("%Y-%m-%d")
        
        if value >= 0:
            npy[i, 1] = value
        else:
            npy[i,1] = -9999
            
        t += timedelta(days=1)
        
    for i in range(date_end, len(npy)):
        npy[i,0] = t.strftime("%Y-%m-%d")
        npy[i,1] = -9999
        
        t += timedelta(days=1)
        
    return npy

In [10]:
filename = '/docker/mnt/d/LM/Python_seminar/2022/discharge_data/MRCdata/MRC/KH_014501_M_QH_E03.csv'
data = load_csv(filename)
t, date_begin, date_end, npy = make_npy(data)
npy = insert_value(data, t, date_begin, date_end, npy)
print(npy)

[['1800-01-01' -9999]
 ['1800-01-02' -9999]
 ['1800-01-03' -9999]
 ...
 ['2039-12-29' -9999]
 ['2039-12-30' -9999]
 ['2039-12-31' -9999]]


### 保存用のディレクトリを作成し、データを保存する
- ディレクトリが存在しない場合は新規で作成する関数
- 保存ファイル名は、"国識別記号_観測値識別番号.csv"とする

In [11]:
def my_makedirs(path):
    if not os.path.isdir(path):
        os.makedirs(path)
        
def save_csv(data, save_dir, filename):
    data_pd = pd.DataFrame(data)
    
    my_makedirs(save_dir)
    data_pd.to_csv(f'{save_dir}/{filename}', header=None, index=None)

In [12]:
input_f = '/docker/mnt/d/LM/Python_seminar/2022/discharge_data/MRCdata/MRC/KH_014501_M_QH_E03.csv'
save_dir = '/docker/mnt/d/LM/Python_seminar/2022/discharge_data/MRCdata/Day_data'
data = load_csv(input_f)
output_f = f'{data.iloc[0,0]}_{data.iloc[0,1]}_DISCHARGE.csv'

print(f'save_dir: {save_dir}')
print(f'output_f: {output_f}')

t, date_begin, date_end, npy = make_npy(data)
npy = insert_value(data, t, date_begin, date_end, npy)
save_csv(npy, save_dir, output_f)

save_dir: /docker/mnt/d/LM/Python_seminar/2022/discharge_data/MRCdata/Day_data
output_f: KH_14501_DISCHARGE.csv


### 全データに対して実行
- rootディレクトリを選択(適宜皆さんのデータがあるpathを選択してください）
- root下に'Day_data'ディレクトリを新たに作成してください(save_csvにsave_dirを渡せば自動生成されます)
- 入力データはファイル名に'QH'を含むもののみを扱います
     - QH: 流量データ
     - HH: 水位データ
- 保存名の最後には流量データであることを明記します"DISCHARGE"

In [13]:
def main():
    root = '/docker/mnt/d/LM/Python_seminar/2022/discharge_data/MRCdata'
    save_dir = f'{root}/Day_data'
    input_f = 'MRC/*_QH_*.csv'
    
    for x in glob.glob(f'{root}/{input_f}'):
        data = load_csv(x)
        t, date_begin, date_end, npy = make_npy(data)
        npy = insert_value(data, t, date_begin, date_end, npy)
        
        output_f = f'{data.iloc[0,0]}_{data.iloc[0,1]}_DISCHARGE.csv'
        save_csv(npy, save_dir, output_f)
        
        print(x)

In [14]:
main()

/docker/mnt/d/LM/Python_seminar/2022/discharge_data/MRCdata/MRC/KH_014501_M_QH_E03.csv
/docker/mnt/d/LM/Python_seminar/2022/discharge_data/MRCdata/MRC/KH_014901_M_QH_E03.csv
/docker/mnt/d/LM/Python_seminar/2022/discharge_data/MRCdata/MRC/KH_019801_M_QH_E03.csv
/docker/mnt/d/LM/Python_seminar/2022/discharge_data/MRCdata/MRC/KH_019802_M_QH_E03.csv
/docker/mnt/d/LM/Python_seminar/2022/discharge_data/MRCdata/MRC/KH_019806_M_QH_E03.csv
/docker/mnt/d/LM/Python_seminar/2022/discharge_data/MRCdata/MRC/KH_020101_M_QH_E03.csv
/docker/mnt/d/LM/Python_seminar/2022/discharge_data/MRCdata/MRC/KH_033401_M_QH_E03.csv
/docker/mnt/d/LM/Python_seminar/2022/discharge_data/MRCdata/MRC/KH_033402_M_QH_E03.csv
/docker/mnt/d/LM/Python_seminar/2022/discharge_data/MRCdata/MRC/KH_120423_M_QH_E03.csv
/docker/mnt/d/LM/Python_seminar/2022/discharge_data/MRCdata/MRC/KH_430101_M_QH_E03.csv
/docker/mnt/d/LM/Python_seminar/2022/discharge_data/MRCdata/MRC/KH_430102_M_QH_E03.csv
/docker/mnt/d/LM/Python_seminar/2022/discha

/docker/mnt/d/LM/Python_seminar/2022/discharge_data/MRCdata/MRC/TH_160402_M_QH_E03.csv
/docker/mnt/d/LM/Python_seminar/2022/discharge_data/MRCdata/MRC/TH_290102_M_QH_E03.csv
/docker/mnt/d/LM/Python_seminar/2022/discharge_data/MRCdata/MRC/TH_310102_M_QH_E03.csv
/docker/mnt/d/LM/Python_seminar/2022/discharge_data/MRCdata/MRC/TH_310201_M_QH_E03.csv
/docker/mnt/d/LM/Python_seminar/2022/discharge_data/MRCdata/MRC/TH_330103_M_QH_E03.csv
/docker/mnt/d/LM/Python_seminar/2022/discharge_data/MRCdata/MRC/TH_370104_M_QH_E03.csv
/docker/mnt/d/LM/Python_seminar/2022/discharge_data/MRCdata/MRC/TH_370122_M_QH_E03.csv
/docker/mnt/d/LM/Python_seminar/2022/discharge_data/MRCdata/MRC/TH_370210_M_QH_E03.csv
/docker/mnt/d/LM/Python_seminar/2022/discharge_data/MRCdata/MRC/TH_370805_M_QH_E03.csv
/docker/mnt/d/LM/Python_seminar/2022/discharge_data/MRCdata/MRC/TH_371101_M_QH_E03.csv
/docker/mnt/d/LM/Python_seminar/2022/discharge_data/MRCdata/MRC/TH_371203_M_QH_E03.csv
/docker/mnt/d/LM/Python_seminar/2022/discha