## This notebook downloads and generates data for neural network ingestion

In [1]:
%matplotlib inline
import numpy as np # linear algebra
from pathlib import Path
import pandas as pd
from tqdm.notebook import tqdm,trange

In [2]:
def gnss_log_to_dataframes(path):
    print('Loading ' + path, flush=True)
    gnss_section_names = {'Raw','UncalAccel', 'UncalGyro', 'UncalMag', 'Status'} # 'Fix', 'OrientationDeg' -->empty
    with open(path) as f_open:
        datalines = f_open.readlines()

    datas = {k: [] for k in gnss_section_names}
    gnss_map = {k: [] for k in gnss_section_names}
    for dataline in datalines:
        is_header = dataline.startswith('#')
        dataline = dataline.strip('#').strip().split(',')
        # skip over notes, version numbers, etc
        if is_header and dataline[0] in gnss_section_names:
            gnss_map[dataline[0]] = dataline[1:]
        elif not is_header:
            datas[dataline[0]].append(dataline[1:])

    results = dict()
    for k, v in datas.items():
        results[k] = pd.DataFrame(v, columns=gnss_map[k])
    # pandas doesn't properly infer types from these lists by default
    for k, df in results.items():
        for col in df.columns:
            if col == 'CodeType':
                continue
            results[k][col] = pd.to_numeric(results[k][col])

    return results

In [3]:
def gen_rawdf(log_file):
    df_collection=gnss_log_to_dataframes(log_file)
    # load raw df
    df_raw=df_collection['Raw']
    df_raw=df_raw[['utcTimeMillis','TimeNanos','FullBiasNanos']]
    gpstime=round((df_raw['TimeNanos']-df_raw['FullBiasNanos'])/1000000.0).astype('Int64')
    df_raw.insert(1,'millisSinceGpsEpoch',gpstime)
    df_raw=df_raw.drop_duplicates(subset='utcTimeMillis',ignore_index=True)
    df_raw.drop(labels=['TimeNanos','FullBiasNanos'],axis=1,inplace=True)
    # load accel df
    acc_df=df_collection['UncalAccel']
    acc_df.drop(labels='elapsedRealtimeNanos',axis=1,inplace=True)
    df_raw=df_raw.merge(acc_df,how='outer',on='utcTimeMillis')
    # load gyro
    gyro_df=df_collection['UncalGyro']
    gyro_df.drop(labels='elapsedRealtimeNanos',axis=1,inplace=True)
    df_raw=df_raw.merge(gyro_df,how='outer',on='utcTimeMillis')
    # load mag df
    mag_df=df_collection['UncalMag']
    mag_df.drop(labels='elapsedRealtimeNanos',axis=1,inplace=True)
    df_raw=df_raw.merge(mag_df,how='outer',on='utcTimeMillis')
    
    df_raw.sort_values(by=['utcTimeMillis'],inplace=True,ignore_index=True)
    return df_raw

In [4]:
def next_avail(l,index):
    pos=index+1
    while (np.isnan(l[pos])):
        pos+=1
    return pos

def last_avail(l,index):
    pos=index-1
    while (np.isnan(l[pos])):
        pos-=1
    return pos

def populate_numpy(l):
    
    if(np.isnan(l[0,1])): # 0 pos population
        pos=next_avail(l[:,1],0)
        l[0,1:]=l[pos,1:]
    k=l.shape[0]
    
    if(np.isnan(l[k-1,1])): #last pos population
        pos=last_avail(l[:,1],k-1)
        l[k-1,1:]=l[pos,1:]
    
    for i in trange(k):
        if (np.isnan(l[i,1])):
            if (i==k-1):
                l[i,1:]=l[i-1,1:]
            else:
                pos=next_avail(l[:,1],i)
            # weighted time averaged nan assignment
                r1=l[i,0]-l[i-1,0]
                r2=l[pos,0]-l[i,0]
                l[i,1:]=((l[i-1,1:]*r2)+(l[pos,1:]*r1))/(r1+r2)

In [5]:
def correct_gdim(l):
    pos=0
    if(np.isnan(l[0,1])):
        pos=next_avail(l[:,1],0)
    if(l[pos,-1]>9.5): # by default if z-dim is g then return array as it is
        return l
    elif(l[pos,-2]>9.5): # if y-dim is g swap y and z dims
        l[:,[-2,-1]]=l[:,[-1,-2]]
        return l
    elif(l[pos,-3]>9.5):
        l[:,[-3,-1]]=l[:,[-1,-3]]
        return l
    else:
        l[:,[-2,-1]]=l[:,[-1,-2]]
        return l

def populator(df):
    l=df[['utcTimeMillis','UncalAccelXMps2','UncalAccelYMps2','UncalAccelZMps2']].to_numpy()
    l=correct_gdim(l)
    populate_numpy(l)
    df=df.assign(UncalAccelXMps2=l[:,1],UncalAccelYMps2=l[:,2],UncalAccelZMps2=l[:,3])
    
    l=df[['utcTimeMillis','UncalGyroXRadPerSec','UncalGyroYRadPerSec','UncalGyroZRadPerSec']].to_numpy()
    populate_numpy(l)
    df=df.assign(UncalGyroXRadPerSec=l[:,1],UncalGyroYRadPerSec=l[:,2],UncalGyroZRadPerSec=l[:,3])
    
    l=df[['utcTimeMillis','UncalMagXMicroT','UncalMagYMicroT','UncalMagZMicroT']].to_numpy()
    populate_numpy(l)
    df=df.assign(UncalMagXMicroT=l[:,1],UncalMagYMicroT=l[:,2],UncalMagZMicroT=l[:,3])
    
    return df

In [6]:
def gen_main_df(log_file):
    df=gen_rawdf(log_file)
    df=populator(df)
    l=df[['utcTimeMillis']].to_numpy()
    temp=np.zeros(l.shape,dtype=np.float64)
    for i in range(l.shape[0]):
        if i==l.shape[0]-1:
            temp[i]=temp[i-1]
        else :
            temp[i]=l[i+1]-l[i]
    df.insert(11,'delta_t',temp)
    return df

In [7]:
def gen_dataset(df,gt_df):
    h=df[df['millisSinceGpsEpoch'].notna()].index.values
    k=gt_df.index.stop-2
    dataset=np.zeros((k,),dtype=dict)
    for i in trange(k):
        data=df.iloc[h[i]:h[i+1]+1].to_numpy()
        acc=data[:,2:5].astype(np.float64)
        gyr=data[:,5:8].astype(np.float64)
        mag=data[:,8:11].astype(np.float64)
        delta_t=data[:,11].astype(np.float64)
        lat=np.float64(data[0,12])
        lon=np.float64(data[0,13])
        gt=gt_df.iloc[i+1,3:5].to_numpy()
        gt_lat=np.float64(gt[0])
        gt_lon=np.float64(gt[1])
        dataset[i]={'readings':[acc,gyr,mag,delta_t,lat,lon],'ground_truth':[gt_lat,gt_lon]}
    dataset[0]['q0']=np.array([df.iloc[h[0]-1,2:5].to_numpy().astype(np.float64),df.iloc[h[0]-1,8:11].to_numpy().astype(np.float64)])
    return dataset

In [18]:
baseline_df=pd.read_csv('data/baseline_locations_train.csv')
phones=baseline_df.phone.unique()
baseline_df=baseline_df.astype({'millisSinceGpsEpoch':'Int64'})
baseline_df.drop(labels=['collectionName','phoneName','heightAboveWgs84EllipsoidM'],axis=1,inplace=True)
for i in phones:
    folder="data/train/"+i.replace('_','/')
    log_file=folder+'/'+folder.split('/')[-1]+'_GnssLog.txt'
    gt_file=folder+'/ground_truth.csv'
    main_df=gen_main_df(log_file) # csv with all raw data for accelerometer gyro and mag [ Delta_t should be added here]
    main_df=main_df.merge(baseline_df,how='left',on='millisSinceGpsEpoch') #merge baseline
    gt_df=pd.read_csv(gt_file)
    dataset=gen_dataset(main_df,gt_df)
    np.save('decimeter/train/'+i+'.npy',dataset)