### Ensure you are using Python 2.7 kernel (Select Kernel-> Change Kernel -> Python 2.x)

In [None]:
import json
from json import JSONEncoder
import re
import time
import datetime

In [None]:
class SensorData:
    def __init__(self):
        self.date_time = '' # day for which data is stored
        self.sensor_value = {} # dictionary of parameter and List of values

In [None]:
class MyEncoder(JSONEncoder):
    def default(self, o):
        return o.__dict__ 
# http://stackoverflow.com/questions/3768895/how-to-make-a-class-json-serializable    

In [None]:
def parse_sensor_data_test(file_name):
    start_time = time.time()
    
    PATTERN_HEADER = ur'(?m)^(?P<year>\d{4})(?P<month>\d{2})(?P<day>\d{2})'
    PATTERN_DATA = ur'(?m)(?P<temperature>\d{4}|NNNN)(?P<humdity>\d{3}|NNN)'
    
    sensor_data_regex = re.compile(PATTERN_DATA)
    param_names = sensor_data_regex.groupindex.keys()
    
    all_data = []
    with open(file_name,'r') as rdr:            
        for line in rdr:
            # One row contains a day worth of data
            sensor_data = SensorData()
            for parameter in param_names:
                sensor_data.sensor_value[parameter] = []

            line = line.decode('utf-8')

            match = re.search(PATTERN_HEADER, line)

            if match:
                sensor_data.date_time = \
                    '-'.join([match.group('year'),match.group('month'),match.group('day')])
                
                header_len = len(match.group(0))
                
                match_iter = sensor_data_regex.finditer(line,header_len)
                
                for match in match_iter:
                    for parameter in param_names:
                        value = match.group(parameter)
                        
                        try:
                            sensor_data.sensor_value[parameter].append(int(value))
                        except:
                            # NNN or NNNN => store none
                            sensor_data.sensor_value[parameter].append(None)
                            
            all_data.append(sensor_data)
        
        print (u'Elapsed Time For Parsing: {0}'.format(time.time()-start_time))
        
        json_out = json.dumps(all_data, ensure_ascii=False, cls=MyEncoder, indent=True)        
        print (u'Including JSON Conversion: {0}'.format(time.time()-start_time))
        
        json_out = json_out.encode('utf-8')        
        print (u'Including Encoding Time: {0}'.format(time.time()-start_time))        
        
        with open(file_name+'.json','w') as wr:
            wr.write(json_out)
            
        print (u'Including Write Time: {0}'.format(time.time()-start_time))     

In [None]:
file_name =  r"..\Data\SensorData\sensordata_365.txt"
parse_sensor_data_test(file_name)