In [None]:
import pandas as pd
import numpy as np
import os
import shutil
import glob
from collections import defaultdict
import csv


In [None]:
def remove_empty_lines(file_path):
    # Read lines from the original CSV file
    with open(file_path, 'r', encoding='utf-8') as file:
        lines = file.readlines()

    # Remove empty lines
    non_empty_lines = [line for line in lines if line.strip() and not line.strip().replace(',', '').replace(' ', '') == '']

    # Write non-empty lines back to the file
    with open(file_path, 'w', encoding='utf-8') as file:
        file.writelines(non_empty_lines)
        # Truncate the file to remove extra bytes
        file.truncate()

    
## @brief method that merges all the exported data from PlotToSat into 
#  a single .csv file
#  @notes it assumes that all processes finished smoothly
#  @param[in] nameOfCSVFolderDir the directory that contains all the exported
#  .csv files. You need to download and extract this folder from Google Drive
#  @param[in] fieldDataWithIdentifiers after each run, this is found at the 
#  same directory as PlotToSat .ipynb files. It is the field data with an extra
#  column named "indexField". This column saves some identifiers used to merge
#  the field data with the exported EO spectral temporal signatures
#  @param[in] samplingSizeFile. This is also found in the same location as the 
#  .ipynb files of the PlotToSat. It contains a number that shows how many plot 
#  data were interpeted per each exported .csv file. Please note that .csv 
#  file may have less lines due to missing or masked out data
def mergeAll(nameOfCSVFolderDir,fieldDataWithIdentifiers):
    
    fieldDataDir       =fieldDataWithIdentifiers

    # make tmp directories
    ResDir  = os.path.join(nameOfCSVFolderDir,"MergedCsvs")

    if os.path.isdir(ResDir):
        print ("MergedCsvs Exist. Deleting all of its content!")
        shutil.rmtree(ResDir)
    os.mkdir(ResDir)
          
    ListS1Mean=[]
    ListS2Mean=[]
    ListS1StdD=[]
    ListS2StdD=[]
    ListS1MeanDF=None
    ListS2MeanDF=None
    ListS1StdDDF=None
    ListS2StdDDF=None
    MeanDFAll   =None
    StdDFAll    =None
    ListS1MeanExist = False
    ListS1StdDExist = False
    ListS2MeanExist = False
    ListS2StdDExist = False

    

    for file1 in glob.glob(nameOfCSVFolderDir+"/*.csv"):
        S1Mean="S1_mean.csv"
        S2Mean="S2_mean.csv"
        S1StdD="S1_stdD.csv"
        S2StdD="S2_stdD.csv"
        if(len(file1)>11):
            if   (file1[len(file1)-11:len(file1)]==S1Mean):
                ListS1Mean=ListS1Mean+[file1]
            elif (file1[len(file1)-11:len(file1)]==S2Mean):
                ListS2Mean=ListS2Mean+[file1]
            elif (file1[len(file1)-11:len(file1)]==S1StdD):
                ListS1StdD=ListS1StdD+[file1]
            elif (file1[len(file1)-11:len(file1)]==S2StdD):
                ListS2StdD=ListS2StdD+[file1]
            elif (file1[len(file1)-11:len(file1)]=="tifiers.csv"): #in case field data are added in the nameOfCSVFolderDir
                print(file1 , " is suspected to be the field data file")
            else :
                print("WARNING: ", file1, " is ignored since it is not recognised as an output of the system")



    print ("ListS1Mean",len(ListS1Mean),"    ListS2Mean ", len(ListS2Mean), "     ListS1StdD ", len(ListS1StdD), "     ListS2StdD ", len(ListS2StdD))
    if (len(ListS1Mean)>0):
        ListS1MeanExist = True 
    if (len(ListS1StdD)>0):
        ListS1StdDExist = True
    if (len(ListS2Mean)>0):
        ListS2MeanExist = True
    if (len(ListS2StdD)>0):
        ListS2StdDExist = True
        
    # if S1 Mean exist merge into a single dataframe
    if(ListS1MeanExist):    
        ListS1Mean.sort()
        i=0
        while(i<len(ListS1Mean)):
            #print(ListS1Mean[i],"***")
            try:
                tmpDF = pd.read_csv(ListS1Mean[i],dtype=str, keep_default_na=False)
                tmpDF["indexField"] = tmpDF["indexField"].astype(int)  
                tmpDF = tmpDF.drop(columns=[".geo","system:index"])
                ListS1MeanDF = pd.concat([ListS1MeanDF,tmpDF],ignore_index=True,axis=0)
            except ValueError:
                print("WARNING: File ", ListS1Mean[i], " failed to be added to the merged file!")    
            i=i+1
    if(ListS1StdDExist):    
        ListS1StdD.sort()
        i=0
        while(i<len(ListS1StdD)):
            try:
                tmpDF = pd.read_csv(ListS1StdD[i],dtype=str, keep_default_na=False)
                tmpDF["indexField"] = tmpDF["indexField"].astype(int)  
                tmpDF = tmpDF.drop(columns=[".geo","system:index"])
                ListS1StdDDF = pd.concat([ListS1StdDDF,tmpDF],ignore_index=True,axis=0)
            except ValueError:
                print ("WARNING: File ", ListS1StdD[i], " failed to be added to the merged file!")
            i=i+1
        
    if(ListS2MeanExist):    
        ListS2Mean.sort()
        i=0
        while(i<len(ListS2Mean)):
            try:
                tmpDF = pd.read_csv(ListS2Mean[i],dtype=str, keep_default_na=False)
                tmpDF["indexField"] = tmpDF["indexField"].astype(int)  
                tmpDF = tmpDF.drop(columns=[".geo","system:index"])
                ListS2MeanDF = pd.concat([ListS2MeanDF,tmpDF],ignore_index=True,axis=0)
            except ValueError:
                print ("WARNING: File ", ListS2Mean[i], " failed to be added to the merged file!")

            i=i+1
        
    if(ListS2StdDExist):    
        ListS2StdD.sort()
        i=0
        while(i<len(ListS2StdD)):
            try:
                tmpDF = pd.read_csv(ListS2StdD[i],dtype=str, keep_default_na=False)
                tmpDF["indexField"] = tmpDF["indexField"].astype(int)  
                tmpDF = tmpDF.drop(columns=[".geo","system:index"])
                ListS2StdDDF = pd.concat([ListS2StdDDF,tmpDF],ignore_index=True,axis=0)
            except ValueError:
                    print ("WARNING: File ", ListS2StdD[i], " failed to be added to the merged file!")
            i=i+1
        
            
      
    firstItem = None
    if(ListS1Mean!=[]):
        firstItem=ListS1Mean[0]
    elif (ListS2Mean!=[]):
        firstItem=ListS2Mean[0]
    else :
        raise Exception ("ERROR: no data found. Both Sentinel-1 and Sentinel-2 lists are empty")
    fileNames = firstItem[0:len(firstItem)-34] 
    head, fileNames = os.path.split(fileNames)
    
    
    dfFieldData = pd.read_csv(fieldDataDir,dtype=str, keep_default_na=False)
    dfFieldData["indexField"] = dfFieldData["indexField"].astype(int)    
    
    MeanDFAll = dfFieldData
    StdDFAll  = dfFieldData 
    
    # if S1 Mean exist merge into a single dataframe
    if(ListS1MeanExist):    
        MeanDFAll =pd.merge(MeanDFAll,ListS1MeanDF,how='left',on=['indexField'])

    if(ListS1StdDExist):    
        StdDFAll = pd.merge(StdDFAll,ListS1StdDDF,how='left',on=['indexField'])

    if(ListS2MeanExist):    
        MeanDFAll = pd.merge(MeanDFAll,ListS2MeanDF,how='left',on=['indexField'])
        
    if(ListS2StdDExist):    
        StdDFAll = pd.merge(StdDFAll,ListS2StdDDF,how='left',on=['indexField'])


    MeanDFAll.iloc[:, -1] = MeanDFAll.iloc[:, -1].str.strip()
    StdDFAll.iloc [:, -1] = StdDFAll.iloc [:, -1].str.strip()
   
    outMean = open(ResDir+"/"+fileNames+"_mean.csv","w")
    outstdD = open(ResDir+"/"+fileNames+"_stdD.csv","w")
    MeanDFAll.to_csv(outMean,encoding='utf-8',index=False)
    StdDFAll.to_csv (outstdD,encoding='utf-8',index=False)

    remove_empty_lines(ResDir+"/"+fileNames+"_mean.csv")
    remove_empty_lines(ResDir+"/"+fileNames+"_stdD.csv")

    remove_empty_lines(ResDir+"/"+fileNames+"_mean.csv")
    remove_empty_lines(ResDir+"/"+fileNames+"_stdD.csv")
   

    
    print("Results are stored in ", ResDir)
