In [30]:
import os
import numpy as np
import pandas as pd
import csv

In [31]:
np.random.seed(0)

In [32]:
x = np.arange(10)

In [33]:
def train_val_test_split(startYear=2001, endYear=2017, valPercent=0.2, testPercent=0.2):
    assert(valPercent + testPercent < 1.0)
    nYears = np.int32(endYear - startYear + 1)
    nVal = np.ceil(valPercent*nYears).astype(np.int32)
    nTest = np.ceil(testPercent*nYears).astype(np.int32)
    nTrain = nYears - (nVal + nTest).astype(np.int32)
    perm = np.random.permutation(np.arange(startYear, endYear+1))
    train_year = perm[:nTrain]
    val_year = perm[nTrain:nTrain + nVal]
    test_year = perm[nYears - nTest:]
    return train_year, val_year, test_year

In [34]:
global global_train_year, global_val_year, global_test_year
global_train_year, global_val_year, global_test_year = train_val_test_split(startYear=2001, 
                                                       endYear=2017, 
                                                       valPercent=0.2, 
                                                       testPercent=0.2)

In [35]:
print(global_train_year)
print(global_val_year)
print(global_test_year)

[2002 2007 2009 2010 2014 2005 2003 2015 2011]
[2008 2017 2012 2004]
[2001 2006 2016 2013]


In [36]:
band_use = ['NIR']

In [37]:
reservoirsUse = np.arange(1) # Tonlesap only

In [41]:
def createFileData_1(dataDir, reservoirsUse, bandsUse, timeSteps, yearRange, inputFile, targetFile):
    inputF = open(inputFile, "w")
    targetF = open(targetFile, "w")
    writerInput = csv.writer(inputF)
    writertarget = csv.writer(targetF)
    
    timeSteps += 1
    for reservoir in reservoirsUse:
        for year in yearRange:
            listFilesInWindow = []
            yearDir = dataDir + '/' + str(reservoir) + '/' + str(year)
            listFolders = os.listdir(yearDir)
            listFolders = sorted(listFolders, key=lambda x: int(x))
            
            for i in np.arange(timeSteps):
                folder = listFolders[i]
                dayDir = yearDir + '/' + folder
                listFiles = os.listdir(dayDir)
                for band in bandsUse:
                    for file in listFiles:
                        if band in file:
                            listFilesInWindow.append(dayDir + '/' + file)
            writerInput.writerow(listFilesInWindow[:-1])
            writertarget.writerow(listFilesInWindow[-1:])
            
            for i in np.arange(timeSteps, len(listFolders)):
                folder = listFolders[i]
                listFilesInWindow = listFilesInWindow[1:]
                dayDir = yearDir + '/' + folder
                listFiles = os.listdir(dayDir)
                for band in bandsUse:
                    for file in listFiles:
                        if band in file:
                            listFilesInWindow.append(dayDir + '/' + file)
                writerInput.writerow(listFilesInWindow[:-1])
                writertarget.writerow(listFilesInWindow[-1:])

    inputF.close()
    targetF.close()
    
    return listFilesInWindow

In [42]:
def createFileData(dataDir, reservoirsUse, bandsUse, timeSteps, 
                   startYear=2001, endYear=2017, valPercent=0.2, testPercent=0.2):
    train_year, test_year, val_year = global_train_year, global_val_year, global_test_year
    print(train_year)
    print(val_year)
    print(test_year)
    
    if not os.path.isdir('data_file/{}'.format(timeSteps)):
        os.mkdir('data_file/{}'.format(timeSteps))
    
    # train
    createFileData_1(dataDir, reservoirsUse, bandsUse, timeSteps, train_year, 
                     'data_file/{}/train_data.csv'.format(timeSteps),
                     'data_file/{}/train_target.csv'.format(timeSteps))
    # val
    createFileData_1(dataDir, reservoirsUse, bandsUse, timeSteps, val_year, 
                     'data_file/{}/val_data.csv'.format(timeSteps),
                     'data_file/{}/val_target.csv'.format(timeSteps))
    # test
    createFileData_1(dataDir, reservoirsUse, bandsUse, timeSteps, test_year, 
                     'data_file/{}/test_data.csv'.format(timeSteps),
                     'data_file/{}/test_target.csv'.format(timeSteps))

In [43]:
createFileData(dataDir='MOD13Q1', reservoirsUse=[0], bandsUse=['NIR'], timeSteps=7)

[2002 2007 2009 2010 2014 2005 2003 2015 2011]
[2001 2006 2016 2013]
[2008 2017 2012 2004]


In [44]:
createFileData(dataDir='MOD13Q1', reservoirsUse=[0], bandsUse=['NIR'], timeSteps=10)

[2002 2007 2009 2010 2014 2005 2003 2015 2011]
[2001 2006 2016 2013]
[2008 2017 2012 2004]


In [45]:
createFileData(dataDir='MOD13Q1', reservoirsUse=[0], bandsUse=['NIR'], timeSteps=12)

[2002 2007 2009 2010 2014 2005 2003 2015 2011]
[2001 2006 2016 2013]
[2008 2017 2012 2004]


In [46]:
createFileData(dataDir='MOD13Q1', reservoirsUse=[0], bandsUse=['NIR'], timeSteps=15)

[2002 2007 2009 2010 2014 2005 2003 2015 2011]
[2001 2006 2016 2013]
[2008 2017 2012 2004]
