In [None]:
import pandas as pd
import numpy as np
import os
import shutil
import glob

In [None]:

### @brief method that merges two .csv files horizontally based on the identifiers
#   of a given column 
#   @param[in] icsv1 the first imported .csv file to be merged horizontally
#   @param[in] icsv2 the second imported .csv file to be merged horizontally
#   @param[in] ocsv the output csv file
#  @param[in] samplingSizeFile. This is also found in the same location as the 
#  .ipynb files of the PlotToSat. It contains a number that shows how many plot 
#  data were interpeted per each exported .csv file. Please note that .csv 
#  file may have less lines due to missing or masked out data
#def mergeHorizontally(icsv1, icsv2, ocsv,samplingSizeFile):
#    df1 = pd.read_csv(icsv1)
#    df2 = pd.read_csv(icsv2)

#    mergedDF = pd.merge(df1, df2,on="indexField",how='outer')

#    mergedDF.to_csv(ocsv, index=False)

    
## @brief method that merges all the exported data from PlotToSat into 
#  a single .csv file
#  @notes it assumes that all processes finished smoothly
#  @param[in] nameOfCSVFolderDir the directory that contains all the exported
#  .csv files. You need to download and extract this folder from Google Drive
#  @param[in] fieldDataWithIdentifiers after each run, this is found at the 
#  same directory as PlotToSat .ipynb files. It is the field data with an extra
#  column named "indexField". This column saves some identifiers used to merge
#  the field data with the exported EO spectral temporal signatures
#  @param[in] samplingSizeFile. This is also found in the same location as the 
#  .ipynb files of the PlotToSat. It contains a number that shows how many plot 
#  data were interpeted per each exported .csv file. Please note that .csv 
#  file may have less lines due to missing or masked out data
def mergeAll(nameOfCSVFolderDir,fieldDataWithIdentifiers,samplingSizeFile):
    
    fieldDataDir       =fieldDataWithIdentifiers
    samplingSize = 400

    # make tmp directories
    TmpDir = os.path.join(nameOfCSVFolderDir,"TmpDir")
    ResDir  = os.path.join(nameOfCSVFolderDir,"MergedCsvs")
    if os.path.isdir(TmpDir):
        print ("TmpDir Exist")
        shutil.rmtree(TmpDir)
    os.mkdir(TmpDir)
    if os.path.isdir(ResDir):
        print ("MergedCsvs Exist")
        shutil.rmtree(ResDir)
    os.mkdir(ResDir)

    # Read sampling rate
    try:
        f = open(samplingSizeFile,"r")
        s = f.readline()
        print (s , "************************")
        samplingSize = int(s)
        f.close()
    except OSError:
        print("ERROR:", samplingSizeFile," does not exist. Setting sampling size to default = 400")
        samplingSize = 400


        
    ListS1Mean=[]
    ListS2Mean=[]
    ListS1StdD=[]
    ListS2StdD=[]

    for file1 in glob.glob(nameOfCSVFolderDir+"/*.csv"):
        S1Mean="S1_mean.csv"
        S2Mean="S2_mean.csv"
        S1StdD="S1_stdD.csv"
        S2StdD="S2_stdD.csv"
        if(len(file1)>11):
            if   (file1[len(file1)-11:len(file1)]==S1Mean):
                ListS1Mean=ListS1Mean+[file1]
            elif (file1[len(file1)-11:len(file1)]==S2Mean):
                ListS2Mean=ListS2Mean+[file1]
            elif (file1[len(file1)-11:len(file1)]==S1StdD):
                ListS1StdD=ListS1StdD+[file1]
            elif (file1[len(file1)-11:len(file1)]==S2StdD):
                ListS2StdD=ListS2StdD+[file1]
            elif (file1[len(file1)-11:len(file1)]=="tifiers.csv"): #in case field data are added in the nameOfCSVFolderDir
                print(file1 , " is suspected to be the field data file")
            else :
                print("WARNING: ", file1, " is ignored since it is not recognised as an output of the system")

        #df = pd.read_csv(file)
        #df = df.reindex(sorted(df.columns),axis=1)

    ListS1Mean.sort()
    ListS1StdD.sort()
    ListS2Mean.sort()
    ListS2StdD.sort()

    if(ListS1Mean!=[] and ListS1Mean[0][len(ListS1Mean[0])-33:len(ListS1Mean[0])-23]!="0000000000"):
        raise Exception("ERROR: csv not sorted correctly or files are missing")
    if(ListS1StdD!=[] and ListS1StdD[0][len(ListS1StdD[0])-33:len(ListS1StdD[0])-23]!="0000000000"):
        raise Exception("ERROR: csv not sorted correctly or files are missing")
    if(ListS2Mean!=[] and ListS2Mean[0][len(ListS2Mean[0])-33:len(ListS2Mean[0])-23]!="0000000000"):
        raise Exception("ERROR: csv not sorted correctly or files are missing")
    if(ListS2StdD!=[] and ListS2StdD[0][len(ListS2StdD[0])-33:len(ListS2StdD[0])-23]!="0000000000"):
        raise Exception("ERROR: csv not sorted correctly or files are missing")


    if ListS1Mean!=[] and ListS2Mean!=[] and len(ListS2Mean)!=len(ListS1Mean):
        raise Exception ("ERROR: if both Sentinel 1 and Sentinel 2 data were loaded then equal number of csv files should have existed - maybe GEE processing have not finished yet")
    if ListS1StdD!=[] and ListS2StdD!=[] and len(ListS2StdD)!=len(ListS1StdD):
        raise Exception ("ERROR: if both Sentinel 1 and Sentinel 2 data were loaded then equal number of csv files should have existed - maybe GEE processing have not finished yet")




    firstItem = None
    if(ListS1Mean!=[]):
        firstItem=ListS1Mean[0]
    elif (ListS2Mean!=[]):
        firstItem=ListS2Mean[0]
    else :
        raise Exception ("ERROR: no data found. Both Sentinel-1 and Sentinel-2 lists are empty")

    tmpDF = pd.read_csv(firstItem)
    dfFieldData = pd.read_csv(fieldDataDir)
    lenOfFieldData = len(dfFieldData.index)
    currentMin = 0
    currentMax = samplingSize
    fileNames = firstItem[0:len(firstItem)-34] 
    head, fileNames = os.path.split(fileNames)

    count = 0
    while currentMin<lenOfFieldData :
        strCurrentMin = str(currentMin  ).rjust(10,'0')
        strCurrentMax = str(currentMax-1).rjust(10,'0')
        FilenamesWithSampling = fileNames+"_"+strCurrentMin+"_"+strCurrentMax
        subsetFieldDF = dfFieldData.iloc[currentMin:currentMax]
        dfMean=subsetFieldDF
        dfStdD=subsetFieldDF
        
        if(count<len(ListS1Mean)):
            tmpDF = pd.read_csv(ListS1Mean[count])
            tmpDF = tmpDF.reindex(sorted(tmpDF.columns),axis=1)
            dfMean = pd.merge(dfMean,tmpDF,on="indexField",how='outer')
        if(count<len(ListS2Mean)):
            tmpDF = pd.read_csv(ListS2Mean[count])
            tmpDF = tmpDF.reindex(sorted(tmpDF.columns),axis=1)
            dfMean = pd.merge(dfMean,tmpDF,on="indexField",how='outer')
        
        if(count<len(ListS1StdD)):
            tmpDF = pd.read_csv(ListS1StdD[count])
            tmpDF = tmpDF.reindex(sorted(tmpDF.columns),axis=1)
            dfStdD = pd.merge(dfMean,tmpDF,on="indexField",how='outer')
        if(count<len(ListS2StdD)):
            tmpDF = pd.read_csv(ListS2StdD[count])
            tmpDF = tmpDF.reindex(sorted(tmpDF.columns),axis=1)
            dfStdD = pd.merge(dfMean,tmpDF,on="indexField",how='outer')
        
        dfMean.to_csv(TmpDir+"/"+FilenamesWithSampling+"_Mean.csv", index=False)
        dfStdD.to_csv(TmpDir+"/"+FilenamesWithSampling+"_StdD.csv", index=False)
        count = count+1
        currentMin = currentMax
        currentMax = currentMax + samplingSize

    ListMean=[]
    ListStdD=[]
    for file1 in glob.glob(TmpDir+"/*.csv"):
        Mean="Mean.csv"
        StdD="StdD.csv"
        if(len(file1)>len(Mean)):
            if   (file1[len(file1)-len(Mean):len(file1)]==Mean):
                ListMean=ListMean+[file1]
            elif (file1[len(file1)-len(Mean):len(file1)]==StdD):
                ListStdD=ListStdD+[file1]
    ListMean.sort()
    ListStdD.sort()
    outMean = open(ResDir+"/"+fileNames+"_mean.csv","w")
    outstdD = open(ResDir+"/"+fileNames+"_stdD.csv","w")
    if(outstdD.closed or outMean.closed):
        raise Exception ("ERROR: Failed to create merged files. Possibly ", ResDir, " was not created!")

    count = 0
    fileNo = 0
    for file1 in ListMean:
        fileNo = fileNo + 1
        with open(file1) as file:
            line = file.readline()
            if fileNo == 1:
                outMean.writelines(line) 
            count = 0
            while(line):
                if count == 0 :
                    count = 1
                    line = file.readline()   
                    continue
                outMean.writelines(line) 
                line = file.readline()   
        f.close()

    count = 0
    fileNo = 0
    for file1 in ListStdD:
        fileNo = fileNo + 1
        with open(file1) as file:
            line = file.readline()
            if fileNo == 1:
                outstdD.writelines(line) 
            count = 0
            while(line):
                if count == 0 :
                    count = 1
                    line = file.readline()   
                    continue
                outstdD.writelines(line) 
                line = file.readline()   
        f.close()

    outMean.close()
    outstdD.close()

    shutil.rmtree(TmpDir)
    print("Results are stored in ", ResDir)
