In [None]:
import json
from json import JSONEncoder
import re
import time

In [None]:
class LogMetrics:
    def __init__(self):
        self.log_file_name = '' # log file name 
        self.directory = {} # source and destionation directory paths
        self.metrics = []; # metrics row
        self.error = False  
        self.error_message = '' 

In [None]:
# http://stackoverflow.com/questions/3768895/how-to-make-a-class-json-serializable
class MyEncoder(JSONEncoder):
    def default(self, o):
        return o.__dict__ 

In [None]:
def process_robocopy_log(file_list):    
    
    # Extract source and destination directory    
    PATTERN_DIRECTORY_NAME = r'(?i)^\s+(?P<type>Source|Dest)\s+:\s+(?P<dir>.+)'
    
    # Locate Errors
    PATTERN_ERROR = r'(?i)^(?P<ts>\d{4}(/\d{2}){2}\s+(\d{2}:){2}\d{2})\s+error(?P<error>.+)'            
    
    # Extract Metrics columns
    PATTERN_METRICS = \
     r'(?i)^\s+(?P<type>dirs|files|bytes)\s+:\s+'\
     r'(?P<total>\d{1,})\s+(?P<copied>\d{1,})\s+'\
     r'(?P<skipped>\d{1,})\s+(?P<mismatch>\d{1,})\s+'\
     r'(?P<failed>\d{1,})\s+(?P<extras>\d{1,})'
    
    for file_name in file_list:
        log_metrics = LogMetrics()
        log_metrics.log_file_name = file_name
        
        with open(file_name,'r', encoding='utf-8') as rdr:            
            for line in rdr:                
                match = re.search(PATTERN_ERROR, line)
            
                if match:
                    log_metrics.error = True
                    log_metrics.error_message = line
                    
                match = re.search(PATTERN_DIRECTORY_NAME, line)
                
                if match:
                    log_metrics.directory[match.group('type')] = match.group('dir')
                    
                match = re.search(PATTERN_METRICS, line)
                
                if match:
                    metricsRow = {}
                    
                    # metrics table - iterate using named groups
                    for key,value in match.groupdict().items():
                        if key == 'type':
                            # Type is a string "Dirs","Files","Bytes"
                            metricsRow[key] = value
                        else:
                            # Total, Copied, Skipped, Mismatch etc are integers
                            metricsRow[key] = int(value)
                        
                    # add to output                    
                    log_metrics.metrics.append(metricsRow)                  
                    
                    
            with open(file_name+'.json','w', encoding='utf-8') as wr:
                json.dump(log_metrics, wr, ensure_ascii=False, cls=MyEncoder, indent=True)

In [None]:
file_list = [
        r"..\Data\RobocopyLog\rocopylog_invalid_source.txt",
        r"..\Data\RobocopyLog\rocopylog.txt"]

start_time = time.time()

process_robocopy_log(file_list)

end_time = time.time()

print('Elapsed seconds: {0:.2f}s'.format(end_time-start_time))