In [76]:
import os
import fnmatch
import re
import glob
import shutil
import datetime

def findfiles(which, where='.'):
    '''Returns list of filenames from `where` path matched by 'which'
       shell pattern. Matching is case-insensitive.'''
    # https://gist.github.com/techtonik/5694830
    rule = re.compile(fnmatch.translate(which), re.IGNORECASE)
    return [name for name in os.listdir(where) if rule.match(name)]

def barcode2List(b):
    '''
    Takes a single barcode and returns a list of numbers
    Only takes NO or LSU barcodes in the following format, if a different format, files will not be moved or listed
    LSU00010203 -> ['00','01','02','03']
    NO0010203 -> ['0','01','02','03']
    '''
    if b[0:2] == "LS":
        # Grab only the numbers of the barcode
        barcode = re.sub('[^0-9]','', b)
        # Split string every 2 characters (stackxchange - "split string every nth character?")
        barcodeList = re.findall('.{1,2}', barcode)
        return barcodeList
    
    elif b[0:2] == "NO":
        # Grab only the barcode
        barcode = re.sub('[^0-9]','', b)
        # Save first number from barcode as a list
        firstList = [barcode[0]]
        # Save the rest of barcode
        sixDigitsNO = barcode[1:]
        # Create list from rest of barcode, paste to first digit
        sixDList = re.findall('.{1,2}', sixDigitsNO)
        barcodeList = firstList + sixDList
        return barcodeList
    else:
        print("Non LSU or NO barcode detected. These files will NOT be moved or listed")
    

def bcList2folders(bcList,root_path):
    '''
    Takes barcode list of digits
    Creates & traverses nested folders 
    Returns path of barcode 
    '''
    curDir = root_path
    for i in bcList:
        # Remove leading 0 for single digit numbers
        folder=str(int(i))
        newDir = os.path.join(curDir,folder)
        if os.path.isdir(newDir):
            curDir=newDir
        else:
            os.makedirs(newDir)
            curDir=newDir
    return curDir
    
def moveFiles(bcL,root_path):
    '''
    Takes barcode list of digits, and root path for specific collection
    Moves files into final resting place
    Returns barcode, path of barcode, and all files moved for that barcode
    '''
    # Use barcode list to traverse/make folders, get final path for barcode
    folderPath=bcList2folders(bcL,root_path)
    
    # Turn barcode list back into barcode string
    barCode=''.join(bcL)
    # List all files matching barcode
    allFiles=(glob.glob(os.path.join(incomingFolder,'*'+barCode+'*')))
    
    # Move all files into their final resting place
    for oneFile in allFiles:
        #shutil.move(oneFile,folderPath)
        shutil.copy(oneFile,folderPath)
    return barCode,folderPath,allFiles
        
        
# Set path to incoming folder
incomingFolder = "/Users/ChatNoir/Projects/HerbariumRA/data_storage_fake/cfla/incoming"
outFileFolder = "/Users/ChatNoir/Projects/HerbariumRA/data_storage_fake/cfla/incoming_records2018"
lsuFolder="/Users/ChatNoir/Projects/HerbariumRA/data_storage_fake/nfsshare/lsu/"
noFolder="/Users/ChatNoir/Projects/HerbariumRA/data_storage_fake/nfsshare/no/vasc_plants/"

#incomingFolder = "/data_storage/cfla/incoming"
#outFileFolder = "/data_storage/cfla/incoming_records2018"
#lsuFolder="/data_storage/nfsshare/lsu/"
#noFolder="/data_storage/nfsshare/no/vasc_plants/"

# Count number of files in incoming folder before moving 
fileBefore = next(os.walk(incomingFolder))[2] 

# List of all jpg files
incomingFileList=findfiles("*jpg",incomingFolder)

# Set of unique barcodes
uniqueBarCodes=set()
for f in incomingFileList:
    # Strip _ from files
    b=re.split("_|\.",f)[0]
    uniqueBarCodes.add(b)

# Barcode strings into barcode lists, all numbers preserved
barcodeLists = []
for ub in uniqueBarCodes:
    barcodeLists.append(barcode2List(ub))

# Make/traverse folders for each barcode and move files
folderPathList=[]
movedFileList=[]
movedBarcodeList=[]
for bcL in barcodeLists:
    # Sort based on first set of digits into lsu and no barcodes
    if str(bc[0]) == str("00"):
        root_path=lsuFolder
        x = moveFiles(bcL,root_path)
    elif str(bc[0]) == str("0"):
        root_path=noFolder
        x = moveFiles(bc,root_path)
    else:
        print("Barcode does not start with 0. There is probably an error. Has not been filed or listed.")
        
    # Keep lists of: files moved to folders, paths to folders, barcodes
    for z in x[2]:
        movedFileList.append(os.path.basename(z))
    folderPathList.append(x[1])
    movedBarcodeList.append(x[0])
    
# Count number of files in incoming folder after moving 
fileAfter= next(os.walk(incomingFolder))[2]

# Make file name based on date
outFileName=str(datetime.date.today()).replace("-","_")+str("_movedimages.out")
outFilePath=os.path.join(outFileFolder,outFileName)
print(outFilePath)


outFile = open('%s' % outFilePath, 'w')
outFile.write([str(datetime.datetime.now())][0]+"\n")
outFile.write("Barcodes moved, files moved\n")
outFile.write(str(len(movedBarcodeList))+", "+str(len(movedFileList))+"\n")
outFile.write("File left in incoming: "+str(len(fileAfter))+"\n")
if len(fileAfter) != 0:
    outFile.write("FILES LEFT IN INCOMING. PLEASE MOVE THESE MANUALLY\n")
    for i in fileAfter:
        outFile.write("%s\n" % i)
outFile.write("Folders Created:\n")
for p in folderPathList:
     outFile.write("%s\n" % p)
outFile.close


/Users/ChatNoir/Projects/HerbariumRA/data_storage_fake/cfla/incoming_records2018/2018_08_24_movedimages2.out


<function close>

# Notes

## File naming/folders patterns

/data_storage/nfsshare/lsu/
/data_storage/nfsshare/no/vasc_plants/

LSU = 8
NO = 7

LSU00010101_1.jpg  
LSU/00/01/01/01  
lsu/0/1/1/1  

LSU00447014  
LSU/00/44/70/14  
lsu/0/44/70/14  

NO0088855  
NO/0/08/88/55  
no/vasc_plants/0/8/88/55  

## Pseudocode Plans

work by barcode  
when folders are made, move ALL files that match barcode. 
write function that moves files based on barcode once you are in lsu or no folder. 

## Output

date-time
number of final home paths created - # barcodes
number of files moved - total files
number of files NOT moves 
list of files moved
list of home paths created
make incoming_records folder and put these output in there


## Chron job

output error messages from python into an error file with same date as records output by python
recordsFolder = "/Users/ChatNoir/Projects/HerbariumRA/data_storage_fake/cfla/incoming_records2018"
figure out how to send these files somewhere useful for Jennie?? 


## Hardcoded notes

Path to incoming file  
jpg as file extension, can be JPG or jpg  
Only works on files that start with LSU or NO, case sensitive.  
One filter for non-lsu/no barcodes - after this filter files are based on the first set of digits. 00 or 0. 