In [1]:
import json
import re
from pathlib import Path
import traceback




def read_file(filename):
    f = open(filename, 'r')
    return f.read()

def parse_older_format(data_str)->list:    
    line_selector = r"([0-9]{4})-(0[1-9]|1[0-2])-(0[1-9]|[1-2][0-9]|3[0-1]) (2[0-3]|[01][0-9]):([0-5][0-9]):([0-5][0-9]),([0-9]{3}) (.*) b'(.*)'"
    strings = re.findall(line_selector,data_str)
    data_list = []
    for i in strings:
        year, month, day, hour, minute, second, ms, importance, data = i
        d = json.loads(data)
        data_list.append(d)
    return data_list

def parse_newer_format(data_str)->list:    
    line_selector = r"\[(Sun|Mon|Tues|Wed|Thu|Fri|Sat), (\d{2}) (Jan|Feb|Mar|Apr|May|June|July|Aug|Sep|Oct|Nov|Dec) (\d{4}) (\d{2}):(\d{2}):(\d{2})\] INFO b'(.*)'"
    strings = re.findall(line_selector,data_str)
    data_list = []
    for i in strings:
        day, date, month, year, hour, minute, second, data = i
        d = json.loads(data)
        data_list.append(d)
    return data_list


def parse_ttn_log_format(data_str)->list:    
    line_selector = r"([0-9]{4})-(0[1-9]|1[0-2])-(0[1-9]|[1-2][0-9]|3[0-1]) (2[0-3]|[01][0-9]):([0-5][0-9]):([0-5][0-9]),([0-9]{3}) INFO  TtnListener:\d+ - Message arrived on topic 'icss_lora_tracker/devices/icspace19/up': ({.*})"
    strings = re.findall(line_selector,data_str)
    data_list = []
    for i in strings:
        year, month, day, hour, minute, second, ms, data = i
        d = json.loads(data)
        data_list.append(d)
    return data_list


def special_replace(raw_str,the_string):
    return raw_str.replace("{}:".format(the_string),'"{}":'.format(the_string))

def process_str(string):
    raw_str = string.replace("MSG","").replace("(","{").replace(")","}").replace("=",":").replace("'",'"')
    things_to_replace = ["app_id","dev_id","hardware_serial","counter","payload_raw","payload_fields",
                         "metadata","frequency","modulation","data_rate","airtime","coding_rate",
                        "gtw_id","gtw_trusted","channel","rssi","snr","rf_chain","latitude","longitude",
                         "location_source","analog_in_3","barometric_pressure_0","digital_out_4","gps_2",
                         "temperature_1","altitude","is_retry","antenna"]

    for i in things_to_replace:

        raw_str = raw_str.replace("{}".format(i),'"{}"'.format(i))

    raw_str = raw_str.replace("{time",'{"time"')
    raw_str = raw_str.replace("True",'true')
    raw_str = raw_str.replace("time:",'"time":')
    raw_str = raw_str.replace("fine_timestamp:",'"fine_timestamp":')
    raw_str = raw_str.replace("fine_timestamp_encrypted:",'"fine_timestamp_encrypted":')
    raw_str = raw_str.replace("timestamp:",'"timestamp":')
    raw_str = special_replace(raw_str,"port")
    raw_str = special_replace(raw_str,"gateways")

    return raw_str

def parse_jackson_log_format(data_str)->list:    
    line_selector = r"(MSG\(app_id='icss_lora_tracker', dev_id='icspace.*)"
    strings = re.findall(line_selector,data_str)
    data_list = []
    for i in strings:
        data = i
        data = process_str(data)
        if "relative_humidity_2" in data:
            continue
        if "analog_in_1" in data:
            continue

        try:
            d = json.loads(data)
        except Exception:
            print(traceback.format_exc())
            print(data)

        data_list.append(d)
    return data_list





In [5]:
filename = "mqtt_log_data-1.txt"
str_data= read_file(filename)
data_new = parse_newer_format(str_data)

In [6]:
filename = "ICSPACE20-21.out"
str_data= read_file(filename)
data_old = parse_older_format(str_data)

In [12]:
filename = "ttnhabbridge.log"
str_data= read_file(filename)
icspace19_data = parse_ttn_log_format(str_data)

In [3]:
filename = "ttn-logs/raw_log.txt"
str_data = read_file(filename)
jackson_processed = parse_jackson_log_format(str_data)

flight_data_collection

Collection(Database(MongoClient(host=['cluster0-shard-00-02.edygp.mongodb.net:27017', 'cluster0-shard-00-01.edygp.mongodb.net:27017', 'cluster0-shard-00-00.edygp.mongodb.net:27017'], document_class=dict, tz_aware=False, connect=True, authsource='admin', replicaset='atlas-110oyy-shard-0', ssl=True, retrywrites=True, w='majority'), 'flight_data'), 'all_flights')

In [1]:
from pymongo import MongoClient
import urllib.parse

# init mongo connection
username = urllib.parse.quote_plus('dbUser')
password = urllib.parse.quote_plus("PwBhv72bEOq4NGlI")
url = "mongodb+srv://{}:{}@cluster0.edygp.mongodb.net/test?retryWrites=true&w=majority".format(username, password)
client = MongoClient(url)

flight_data_collection = client["flight_data"]["all_flights"]

In [6]:
client["flight_data"]["raw_logs"].insert_many(jackson_processed)

<pymongo.results.InsertManyResult at 0x1e78b538f48>

In [2]:
url

'mongodb+srv://dbUser:PwBhv72bEOq4NGlI@cluster0.edygp.mongodb.net/test?retryWrites=true&w=majority'

In [None]:
flight_data_collection.update_many(doc, doc, {upsert:true})