In [25]:
import os
import numpy as np
import pandas as pd
import csv

In [26]:
np.random.seed()

In [3]:
x = np.arange(10)

In [4]:
def train_val_test_split(startYear=2001, endYear=2017, valPercent=0.2, testPercent=0.2):
    assert(valPercent + testPercent < 1.0)
    nYears = np.int32(endYear - startYear + 1)
    nVal = np.ceil(valPercent*nYears).astype(np.int32)
    nTest = np.ceil(testPercent*nYears).astype(np.int32)
    nTrain = nYears - (nVal + nTest).astype(np.int32)
    perm = np.random.permutation(np.arange(startYear, endYear+1))
    train_year = perm[:nTrain]
    val_year = perm[nTrain:nTrain + nVal]
    test_year = perm[nYears - nTest:]
    return train_year, val_year, test_year

In [5]:
train_year, val_year, test_year = train_val_test_split(startYear=2001, 
                                                       endYear=2017, 
                                                       valPercent=0.2, 
                                                       testPercent=0.2)

In [6]:
print(train_year)
print(val_year)
print(test_year)

[2016 2002 2007 2011 2001 2006 2008 2014 2010]
[2005 2004 2009 2015]
[2017 2012 2013 2003]


In [7]:
band_use = ['NIR']

In [8]:
reservoirsUse = np.arange(1) # Tonlesap only

In [85]:
def createFileData_1(dataDir, reservoirsUse, bandsUse, windowSize, yearRange, inputFile, labelFile):
    inputF = open(inputFile, "w")
    labelF = open(labelFile, "w")
    writerInput = csv.writer(inputF)
    writerLabel = csv.writer(labelF)
    
    for reservoir in reservoirsUse:
        for year in yearRange:
            listFilesInWindow = []
            yearDir = dataDir + '/' + str(reservoir) + '/' + str(year)
            listFolders = os.listdir(yearDir)
            listFolders = sorted(listFolders, key=lambda x: int(x))
            
            for i in np.arange(windowSize):
                folder = listFolders[i]
                dayDir = yearDir + '/' + folder
                listFiles = os.listdir(dayDir)
                for band in bandsUse:
                    for file in listFiles:
                        if band in file:
                            listFilesInWindow.append(dayDir + '/' + file)
            writerInput.writerow(listFilesInWindow[:-1])
            writerLabel.writerow(listFilesInWindow[-1:])
            
            for i in np.arange(windowSize, len(listFolders)):
                folder = listFolders[i]
                listFilesInWindow = listFilesInWindow[1:]
                dayDir = yearDir + '/' + folder
                listFiles = os.listdir(dayDir)
                for band in bandsUse:
                    for file in listFiles:
                        if band in file:
                            listFilesInWindow.append(dayDir + '/' + file)
                writerInput.writerow(listFilesInWindow[:-1])
                writerLabel.writerow(listFilesInWindow[-1:])

    inputF.close()
    labelF.close()
    
    return listFilesInWindow

In [21]:
def createFileData_2(dataDir, reservoirsUse, bandsUse, windowSize, yearRange, inputFile, targetFile):
    inputF = open(inputFile, "w")
    targetF = open(targetFile, "w")
    writerInput = csv.writer(inputF)
    writerTarget = csv.writer(targetF)
    
    for reservoir in reservoirsUse:
        for year in yearRange:
            listFilesInWindow = []
            yearDir = dataDir + '/' + str(reservoir) + '/' + str(year)
            listFolders = os.listdir(yearDir)
            listFolders = sorted(listFolders, key=lambda x: int(x))
            
            listInput = []
            for i in np.arange(len(listFolders) - 1):
                folder = listFolders[i]
                dayDir = yearDir + '/' + folder
                listFiles = os.listdir(dayDir)
                for band in bandsUse:
                    for file in listFiles:
                        if band in file:
                            listInput.append(dayDir + '/' + file)
            writerInput.writerow(listInput)
            
            listTarget = []
            for i in np.arange(windowSize, len(listFolders)):
                folder = listFolders[i]
                dayDir = yearDir + '/' + folder
                listFiles = os.listdir(dayDir)
                for band in bandsUse:
                    for file in listFiles:
                        if band in file:
                            listTarget.append(dayDir + '/' + file)
            writerTarget.writerow(listTarget)

    inputF.close()
    targetF.close()
    
    return listInput

In [23]:
print(createFileData_2(dataDir='MOD13Q1', reservoirsUse=[0], bandsUse=['NIR'], windowSize=7,
                       yearRange=train_year, inputFile='train_data.csv', targetFile='train_target.csv'))

['MOD13Q1/0/2010/2010001/MOD13Q1.A2010001.h28v07.006.2015198101035_250m_16_days_NIR_reflectance.tif', 'MOD13Q1/0/2010/2010017/MOD13Q1.A2010017.h28v07.006.2015198193932_250m_16_days_NIR_reflectance.tif', 'MOD13Q1/0/2010/2010033/MOD13Q1.A2010033.h28v07.006.2015199115526_250m_16_days_NIR_reflectance.tif', 'MOD13Q1/0/2010/2010049/MOD13Q1.A2010049.h28v07.006.2015200022321_250m_16_days_NIR_reflectance.tif', 'MOD13Q1/0/2010/2010065/MOD13Q1.A2010065.h28v07.006.2015206075302_250m_16_days_NIR_reflectance.tif', 'MOD13Q1/0/2010/2010081/MOD13Q1.A2010081.h28v07.006.2015206075615_250m_16_days_NIR_reflectance.tif', 'MOD13Q1/0/2010/2010097/MOD13Q1.A2010097.h28v07.006.2015206224734_250m_16_days_NIR_reflectance.tif', 'MOD13Q1/0/2010/2010113/MOD13Q1.A2010113.h28v07.006.2015207160846_250m_16_days_NIR_reflectance.tif', 'MOD13Q1/0/2010/2010129/MOD13Q1.A2010129.h28v07.006.2015207185944_250m_16_days_NIR_reflectance.tif', 'MOD13Q1/0/2010/2010145/MOD13Q1.A2010145.h28v07.006.2015208145544_250m_16_days_NIR_reflect

In [27]:
def createFileData(dataDir, reservoirsUse, bandsUse, windowSize, 
                   startYear=2001, endYear=2017, valPercent=0.2, testPercent=0.2):
    train_year, test_year, val_year = train_val_test_split(startYear, endYear, valPercent, testPercent)
    print(train_year)
    print(val_year)
    print(test_year)
    # train
    createFileData_2(dataDir, reservoirsUse, bandsUse, windowSize, 
                     train_year, 'train_data.csv', 'train_target.csv')
    # val
    createFileData_2(dataDir, reservoirsUse, bandsUse, windowSize, 
                     val_year, 'val_data.csv', 'val_target.csv')
    # test
    createFileData_2(dataDir, reservoirsUse, bandsUse, windowSize, 
                     test_year, 'test_data.csv', 'test_target.csv')
    
createFileData(dataDir='MOD13Q1', reservoirsUse=[0], bandsUse=['NIR'], windowSize=7)

[2009 2007 2013 2011 2012 2017 2004 2016 2001]
[2006 2005 2002 2003]
[2008 2014 2010 2015]
