Skip to content

Commit

Permalink
fix: [import_dir] remove special characters
Browse files Browse the repository at this point in the history
  • Loading branch information
Terrtia committed Feb 25, 2020
1 parent 72fe8a2 commit bdf2fce
Showing 1 changed file with 14 additions and 4 deletions.
18 changes: 14 additions & 4 deletions bin/import_dir.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import os
import time, datetime
import magic
import re

'''
'
Expand Down Expand Up @@ -50,6 +51,9 @@ def is_hierachy_valid(path):
correctDate = False
return correctDate

def sanitize_str(str_var, invalid_char_regex):
res = re.sub(invalid_char_regex, "-", str_var)
return res.replace(' ', '_')

if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Take files from a directory and push them into a 0MQ feed.')
Expand All @@ -67,6 +71,9 @@ def is_hierachy_valid(path):
socket.bind("tcp://*:{}".format(args.port))
time.sleep(1) #Important, avoid loosing the 1 message

invalid_char = r'[\\/*?&%=:"<>|#\\\']'
invalid_char_dir = r'[\\*?&%=:"<>|#\\\']'

for dirname, dirnames, filenames in os.walk(args.directory):
for filename in filenames:
complete_path = os.path.join(dirname, filename)
Expand All @@ -85,17 +92,20 @@ def is_hierachy_valid(path):
if not is_hierachy_valid(complete_path):
now = datetime.datetime.now()
paste_name = complete_path.split('/')[-1]
paste_name = sanitize_str(paste_name, invalid_char)
directory = complete_path.split('/')[-2]
directory = sanitize_str(directory, invalid_char_dir)
wanted_path = os.path.join(directory, now.strftime("%Y"), now.strftime("%m"), now.strftime("%d"), paste_name)
wanted_path = os.path.relpath(wanted_path)
else:
#take wanted path of the file
wanted_path = os.path.realpath(complete_path)
wanted_path = os.path.relpath(complete_path)
wanted_path = wanted_path.split('/')
wanted_path = '/'.join(wanted_path[-(4+args.hierarchy):])
wanted_path = sanitize_str(wanted_path, invalid_char_dir)

# remove whitespace
wanted_path = wanted_path.replace(' ', '')
feeder_name = args.name.replace(' ', '')
# sanitize feeder_name
feeder_name = os.path.relpath(sanitize_str(args.name, invalid_char))

path_to_send = 'import_dir/' + feeder_name + '>>' + wanted_path
s = b' '.join( [ args.channel.encode(), path_to_send.encode(), base64.b64encode(messagedata) ] )
Expand Down

0 comments on commit bdf2fce

Please sign in to comment.