In [14]:
import sys
import os
import pandas as pd
import shutil
import re
import datetime

In [15]:
def sorted_alphanumeric(data):
    convert = lambda text: int(text) if text.isdigit() else text.lower()
    alphanum_key = lambda key: [ convert(c) for c in re.split('([0-9]+)', key) ]
  
    return sorted(data, key=alphanum_key)

In [16]:
def day_counter(theDay, howManyDays):
    theDay = datetime.datetime.strptime(theDay,"%Y%m%d")
    delta = datetime.timedelta(days = howManyDays)
    targetDay = theDay + delta
    targetDay = targetDay.strftime("%Y%m%d")

    return targetDay

In [17]:
def ERA5_PDF_training(theDay, filePath, Data_180_Path):
    oneDayTrainingSet = Data_180_Path + theDay
    ERA5Path = filePath
    beforeTheDay = day_counter(theDay,-90)
    src = 'ERA5_T2m_EA_'+beforeTheDay+'_PDF.csv'
    des = 'ERA5_T2m_EA_'+beforeTheDay+'_PDF.csv'
    shutil.copy2(os.path.join(ERA5Path, src), os.path.join(oneDayTrainingSet, des))

In [18]:
def CSFv2_PDF_training(theDay, filePath, Data_180_Path):
    oneDayTrainingSet = Data_180_Path + theDay
    CSFv2Path = filePath
    src = 'CFSv2_T2m_EA_'+theDay+'_PDF.csv'
    des = 'CFSv2_T2m_EA_'+theDay+'_PDF.csv'

    shutil.copy2(os.path.join(CSFv2Path, src), os.path.join(oneDayTrainingSet, des))

In [19]:
def ERA5_projection_training(theDay, filePath, Data_180_Path):
    oneDayTrainingSet = Data_180_Path + theDay
    ERA5Path = filePath
    filesList = sorted_alphanumeric(os.listdir(ERA5Path))
    #modelFiles = [file_ for file_ in filesList if file_[9:11]=='EA']
    modelFiles = [file_ for file_ in filesList if file_[9:12]=='USA']
    beforeTheDay = day_counter(theDay,-89)
    year = int(beforeTheDay[:4])
    month = int(beforeTheDay[4:6])
    day = int(beforeTheDay[6:])
    projectionList = []
    for file_ in modelFiles:
        df = pd.read_csv(os.path.join(ERA5Path,file_), header=None)
        df = df[df[0] == year]
        df = df[df[1] == month]
        df = df[df[2] == day]
        beforeDayIndex = df.index[0]
        df = pd.read_csv(os.path.join(ERA5Path,file_), header=None)
        tmp = df.iloc[beforeDayIndex:beforeDayIndex+90,3].tolist()
        projectionList.append(tmp)
    store = pd.DataFrame(data=projectionList)
    store.to_csv(os.path.join(oneDayTrainingSet,'model_eraf.csv'), encoding = 'utf-8', index = True)

In [20]:
def CSFv2_projection_training(theDay, filePath, Data_180_Path):
    oneDayTrainingSet = Data_180_Path+theDay
    CSFv2Path = filePath
    filesList = sorted_alphanumeric(os.listdir(CSFv2Path))
    modelFiles = [file_ for file_ in filesList if file_[10:13]=='USA']
    year = int(theDay[:4])
    month = int(theDay[4:6])
    day = int(theDay[6:])
    projectionList = []
    for file_ in modelFiles:
        df = pd.read_csv(os.path.join(CSFv2Path,file_), header=None)
        df = df[df[0] == year]
        df = df[df[1] == month]
        df = df[df[2] == day]
        theDayIndex = df.index[0]
        df = pd.read_csv(os.path.join(CSFv2Path,file_), header=None)
        tmp = df.iloc[theDayIndex,3:].tolist()
        projectionList.append(tmp)
    store = pd.DataFrame(data=projectionList)
    store.to_csv(os.path.join(oneDayTrainingSet,'model_csfv2.csv'), encoding = 'utf-8', index = True)

In [21]:
def training_assignment(Data_180_Path, PDF_ERA5_Path, PDF_CFSv2_Path, ERA5_past_initial_path, CFSV2_past_initial_path, theDay, filePath):
    if not os.path.isdir(filePath + theDay):
        os.mkdir(filePath + theDay)
    ERA5_PDF_training(theDay, PDF_ERA5_Path, Data_180_Path)
    CSFv2_PDF_training(theDay, PDF_CFSv2_Path, Data_180_Path)
    ERA5_projection_training(theDay, ERA5_past_initial_path, Data_180_Path)
    CSFv2_projection_training(theDay, CFSV2_past_initial_path, Data_180_Path)

In [22]:
#資料位置
Data_180_Path = 'D:/weather/data_180_USA/Train/'
PDF_ERA5_Path = 'F:/PDF_ans/'
PDF_CFSv2_Path = 'F:/PDF_CFSv2/'
ERA5_past_initial_path = 'F:/projection/ERA5_past_initial/'
CFSV2_past_initial_path = 'F:/projection/CFSv2_forecast/'

if not os.path.isdir(Data_180_Path):
    os.mkdir(Data_180_Path)

firstDay = '20110830'
lastDay = '20190630'

first = datetime.datetime.strptime(firstDay,"%Y%m%d")
last = datetime.datetime.strptime(lastDay,"%Y%m%d")
howManyTrainingDatas = (last - first).days + 1
print('There are',howManyTrainingDatas,'training datas.')

for i in range(howManyTrainingDatas):
    theDay = day_counter(firstDay,i)
    training_assignment(Data_180_Path, PDF_ERA5_Path, PDF_CFSv2_Path, ERA5_past_initial_path, CFSV2_past_initial_path, theDay, Data_180_Path)
    print(theDay,'was done.')

There are 2862 training datas.
20110830 was done.
20110831 was done.
20110901 was done.
20110902 was done.
20110903 was done.
20110904 was done.
20110905 was done.
20110906 was done.
20110907 was done.
20110908 was done.
20110909 was done.
20110910 was done.
20110911 was done.
20110912 was done.
20110913 was done.
20110914 was done.
20110915 was done.
20110916 was done.
20110917 was done.
20110918 was done.
20110919 was done.
20110920 was done.
20110921 was done.
20110922 was done.
20110923 was done.
20110924 was done.
20110925 was done.
20110926 was done.
20110927 was done.
20110928 was done.
20110929 was done.
20110930 was done.
20111001 was done.
20111002 was done.
20111003 was done.
20111004 was done.
20111005 was done.
20111006 was done.
20111007 was done.
20111008 was done.
20111009 was done.
20111010 was done.
20111011 was done.
20111012 was done.
20111013 was done.
20111014 was done.
20111015 was done.
20111016 was done.
20111017 was done.
20111018 was done.
20111019 was done.


20121102 was done.
20121103 was done.
20121104 was done.
20121105 was done.
20121106 was done.
20121107 was done.
20121108 was done.
20121109 was done.
20121110 was done.
20121111 was done.
20121112 was done.
20121113 was done.
20121114 was done.
20121115 was done.
20121116 was done.
20121117 was done.
20121118 was done.
20121119 was done.
20121120 was done.
20121121 was done.
20121122 was done.
20121123 was done.
20121124 was done.
20121125 was done.
20121126 was done.
20121127 was done.
20121128 was done.
20121129 was done.
20121130 was done.
20121201 was done.
20121202 was done.
20121203 was done.
20121204 was done.
20121205 was done.
20121206 was done.
20121207 was done.
20121208 was done.
20121209 was done.
20121210 was done.
20121211 was done.
20121212 was done.
20121213 was done.
20121214 was done.
20121215 was done.
20121216 was done.
20121217 was done.
20121218 was done.
20121219 was done.
20121220 was done.
20121221 was done.
20121222 was done.
20121223 was done.
20121224 was

20140108 was done.
20140109 was done.
20140110 was done.
20140111 was done.
20140112 was done.
20140113 was done.
20140114 was done.
20140115 was done.
20140116 was done.
20140117 was done.
20140118 was done.
20140119 was done.
20140120 was done.
20140121 was done.
20140122 was done.
20140123 was done.
20140124 was done.
20140125 was done.
20140126 was done.
20140127 was done.
20140128 was done.
20140129 was done.
20140130 was done.
20140131 was done.
20140201 was done.
20140202 was done.
20140203 was done.
20140204 was done.
20140205 was done.
20140206 was done.
20140207 was done.
20140208 was done.
20140209 was done.
20140210 was done.
20140211 was done.
20140212 was done.
20140213 was done.
20140214 was done.
20140215 was done.
20140216 was done.
20140217 was done.
20140218 was done.
20140219 was done.
20140220 was done.
20140221 was done.
20140222 was done.
20140223 was done.
20140224 was done.
20140225 was done.
20140226 was done.
20140227 was done.
20140228 was done.
20140301 was

20150316 was done.
20150317 was done.
20150318 was done.
20150319 was done.
20150320 was done.
20150321 was done.
20150322 was done.
20150323 was done.
20150324 was done.
20150325 was done.
20150326 was done.
20150327 was done.
20150328 was done.
20150329 was done.
20150330 was done.
20150331 was done.
20150401 was done.
20150402 was done.
20150403 was done.
20150404 was done.
20150405 was done.
20150406 was done.
20150407 was done.
20150408 was done.
20150409 was done.
20150410 was done.
20150411 was done.
20150412 was done.
20150413 was done.
20150414 was done.
20150415 was done.
20150416 was done.
20150417 was done.
20150418 was done.
20150419 was done.
20150420 was done.
20150421 was done.
20150422 was done.
20150423 was done.
20150424 was done.
20150425 was done.
20150426 was done.
20150427 was done.
20150428 was done.
20150429 was done.
20150430 was done.
20150501 was done.
20150502 was done.
20150503 was done.
20150504 was done.
20150505 was done.
20150506 was done.
20150507 was

20160521 was done.
20160522 was done.
20160523 was done.
20160524 was done.
20160525 was done.
20160526 was done.
20160527 was done.
20160528 was done.
20160529 was done.
20160530 was done.
20160531 was done.
20160601 was done.
20160602 was done.
20160603 was done.
20160604 was done.
20160605 was done.
20160606 was done.
20160607 was done.
20160608 was done.
20160609 was done.
20160610 was done.
20160611 was done.
20160612 was done.
20160613 was done.
20160614 was done.
20160615 was done.
20160616 was done.
20160617 was done.
20160618 was done.
20160619 was done.
20160620 was done.
20160621 was done.
20160622 was done.
20160623 was done.
20160624 was done.
20160625 was done.
20160626 was done.
20160627 was done.
20160628 was done.
20160629 was done.
20160630 was done.
20160701 was done.
20160702 was done.
20160703 was done.
20160704 was done.
20160705 was done.
20160706 was done.
20160707 was done.
20160708 was done.
20160709 was done.
20160710 was done.
20160711 was done.
20160712 was

20170727 was done.
20170728 was done.
20170729 was done.
20170730 was done.
20170731 was done.
20170801 was done.
20170802 was done.
20170803 was done.
20170804 was done.
20170805 was done.
20170806 was done.
20170807 was done.
20170808 was done.
20170809 was done.
20170810 was done.
20170811 was done.
20170812 was done.
20170813 was done.
20170814 was done.
20170815 was done.
20170816 was done.
20170817 was done.
20170818 was done.
20170819 was done.
20170820 was done.
20170821 was done.
20170822 was done.
20170823 was done.
20170824 was done.
20170825 was done.
20170826 was done.
20170827 was done.
20170828 was done.
20170829 was done.
20170830 was done.
20170831 was done.
20170901 was done.
20170902 was done.
20170903 was done.
20170904 was done.
20170905 was done.
20170906 was done.
20170907 was done.
20170908 was done.
20170909 was done.
20170910 was done.
20170911 was done.
20170912 was done.
20170913 was done.
20170914 was done.
20170915 was done.
20170916 was done.
20170917 was

20181002 was done.
20181003 was done.
20181004 was done.
20181005 was done.
20181006 was done.
20181007 was done.
20181008 was done.
20181009 was done.
20181010 was done.
20181011 was done.
20181012 was done.
20181013 was done.
20181014 was done.
20181015 was done.
20181016 was done.
20181017 was done.
20181018 was done.
20181019 was done.
20181020 was done.
20181021 was done.
20181022 was done.
20181023 was done.
20181024 was done.
20181025 was done.
20181026 was done.
20181027 was done.
20181028 was done.
20181029 was done.
20181030 was done.
20181031 was done.
20181101 was done.
20181102 was done.
20181103 was done.
20181104 was done.
20181105 was done.
20181106 was done.
20181107 was done.
20181108 was done.
20181109 was done.
20181110 was done.
20181111 was done.
20181112 was done.
20181113 was done.
20181114 was done.
20181115 was done.
20181116 was done.
20181117 was done.
20181118 was done.
20181119 was done.
20181120 was done.
20181121 was done.
20181122 was done.
20181123 was

In [23]:
#資料位置
Data_180_Path = 'D:/weather/data_180_USA/Valid/'
PDF_ERA5_Path = 'F:/PDF_ans/'
PDF_CFSv2_Path = 'F:/PDF_CFSv2/'
ERA5_past_initial_path = 'F:/projection/ERA5_past_initial/'
CFSV2_past_initial_path = 'F:/projection/CFSv2_forecast/'

if not os.path.isdir(Data_180_Path):
    os.mkdir(Data_180_Path)

firstDay = '20190701'
lastDay = '20190930'

first = datetime.datetime.strptime(firstDay,"%Y%m%d")
last = datetime.datetime.strptime(lastDay,"%Y%m%d")
howManyTrainingDatas = (last - first).days + 1
print('There are',howManyTrainingDatas,'training datas.')

for i in range(howManyTrainingDatas):
    theDay = day_counter(firstDay,i)
    training_assignment(Data_180_Path, PDF_ERA5_Path, PDF_CFSv2_Path, ERA5_past_initial_path, CFSV2_past_initial_path, theDay, Data_180_Path)
    print(theDay,'was done.')

There are 92 training datas.
20190701 was done.
20190702 was done.
20190703 was done.
20190704 was done.
20190705 was done.
20190706 was done.
20190707 was done.
20190708 was done.
20190709 was done.
20190710 was done.
20190711 was done.
20190712 was done.
20190713 was done.
20190714 was done.
20190715 was done.
20190716 was done.
20190717 was done.
20190718 was done.
20190719 was done.
20190720 was done.
20190721 was done.
20190722 was done.
20190723 was done.
20190724 was done.
20190725 was done.
20190726 was done.
20190727 was done.
20190728 was done.
20190729 was done.
20190730 was done.
20190731 was done.
20190801 was done.
20190802 was done.
20190803 was done.
20190804 was done.
20190805 was done.
20190806 was done.
20190807 was done.
20190808 was done.
20190809 was done.
20190810 was done.
20190811 was done.
20190812 was done.
20190813 was done.
20190814 was done.
20190815 was done.
20190816 was done.
20190817 was done.
20190818 was done.
20190819 was done.
20190820 was done.
20

In [None]:
#資料位置
Data_180_Path = 'D:/weather/data_180_USA/Test/'
PDF_ERA5_Path = 'F:/PDF_ans/'
PDF_CFSv2_Path = 'F:/PDF_CFSv2/'
ERA5_past_initial_path = 'F:/projection/ERA5_past_initial/'
CFSV2_past_initial_path = 'F:/projection/CFSv2_forecast/'

if not os.path.isdir(Data_180_Path):
    os.mkdir(Data_180_Path)

firstDay = '20191001'
lastDay = '20200531'

first = datetime.datetime.strptime(firstDay,"%Y%m%d")
last = datetime.datetime.strptime(lastDay,"%Y%m%d")
howManyTrainingDatas = (last - first).days + 1
print('There are',howManyTrainingDatas,'training datas.')

for i in range(howManyTrainingDatas):
    theDay = day_counter(firstDay,i)
    training_assignment(Data_180_Path, PDF_ERA5_Path, PDF_CFSv2_Path, ERA5_past_initial_path, CFSV2_past_initial_path, theDay, Data_180_Path)
    print(theDay,'was done.')

There are 244 training datas.
20191001 was done.
20191002 was done.
20191003 was done.
20191004 was done.
20191005 was done.
20191006 was done.
20191007 was done.
20191008 was done.
20191009 was done.
20191010 was done.
20191011 was done.
20191012 was done.
20191013 was done.
20191014 was done.
20191015 was done.
20191016 was done.
20191017 was done.
20191018 was done.
20191019 was done.
20191020 was done.
20191021 was done.
20191022 was done.
20191023 was done.
20191024 was done.
20191025 was done.
20191026 was done.
20191027 was done.
20191028 was done.
20191029 was done.
20191030 was done.
20191031 was done.
20191101 was done.
20191102 was done.
20191103 was done.
20191104 was done.
20191105 was done.
20191106 was done.
20191107 was done.
20191108 was done.
20191109 was done.
20191110 was done.
20191111 was done.
20191112 was done.
20191113 was done.
20191114 was done.
20191115 was done.
20191116 was done.
20191117 was done.
20191118 was done.
20191119 was done.
20191120 was done.
2