Go to Google Drive link below

Find the folder "indoor-location-competition" -> Right Click the folder -> Click Organize->"Add shortcut to Drive" -> A window pops->go to All location and select->MyDrive

https://drive.google.com/drive/folders/14ZqnJDlpgc5O8Dcet6HYHZT1_8uJIwel?usp=drive_link


In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
cp -var /content/drive/MyDrive/indoor-location-competition/ /content

'/content/drive/MyDrive/indoor-location-competition/' -> '/content/indoor-location-competition'
'/content/drive/MyDrive/indoor-location-competition/indoor-location-navigation.zip' -> '/content/indoor-location-competition/indoor-location-navigation.zip'
'/content/drive/MyDrive/indoor-location-competition/test.zip' -> '/content/indoor-location-competition/test.zip'
'/content/drive/MyDrive/indoor-location-competition/test_site_dict.pkl' -> '/content/indoor-location-competition/test_site_dict.pkl'
'/content/drive/MyDrive/indoor-location-competition/iln-x-y-training-and-inference-part-no-pp.ipynb' -> '/content/indoor-location-competition/iln-x-y-training-and-inference-part-no-pp.ipynb'
'/content/drive/MyDrive/indoor-location-competition/ILN_631dat_df_wifi_all.pkl' -> '/content/indoor-location-competition/ILN_631dat_df_wifi_all.pkl'
'/content/drive/MyDrive/indoor-location-competition/test_ts_lag.csv' -> '/content/indoor-location-competition/test_ts_lag.csv'
'/content/drive/MyDrive/indoor-loc

In [3]:
cd /content/indoor-location-competition

/content/indoor-location-competition


In [4]:
import sys
sys.path.append('../input/iln-dataset/ILN_TOOLS')

import gc, glob, time, pickle, math
import numpy as np
import pandas as pd
from tqdm import tqdm

from visualize_f import visualize_trajectory
import compute_f as F
from scipy.interpolate import interp1d

In [5]:
file_header = 'ILN_631dat'
input_dir = 'unzip_dir'
floor_map = {"B2":-2, "B1":-1,
             "F1":0, "F2":1, "F3":2, "F4":3, "F5":4,
             "F6":5, "F7":6, "F8":7, "F9":8,
             "1F":0, "2F":1, "3F":2, "4F":3, "5F":4,
             "6F":5, "7F":6, "8F":7, "9F":8}
site_list = ['5d2709a003f801723c3251bf','5a0546857ecc773753327266',
             '5c3c44b80379370013e0fd2b','5d2709b303f801723c327472',
             '5d2709bb03f801723c32852c','5d2709c303f801723c3299ee',
             '5d2709d403f801723c32bd39','5d2709e003f801723c32d896',
             '5d27075f03f801723c2e360f','5d27096c03f801723c31e5e0',
             '5d27097f03f801723c320d97','5d27099f03f801723c32511d',
             '5da138b74db8ce0c98bd4774','5da958dd46f8266d0737457b',
             '5da1382d4db8ce0c98bbe92e','5da1383b4db8ce0c98bc11ab',
             '5da1389e4db8ce0c98bd0547','5da138274db8ce0c98bbd3d2',
             '5da138314db8ce0c98bbf3a0','5da138364db8ce0c98bc00f1',
             '5da138754db8ce0c98bca82f','5da138764db8ce0c98bcaa46',
             '5dbc1d84c1eb61796cf7c010','5dc8cea7659e181adb076a3f']

In [6]:
'''
Modify the host's code "read_data_file" function in "io_f.py"
for dealing with the malformed data etc.
'''

from dataclasses import dataclass

@dataclass
class ReadData:
    acce: np.ndarray
    acce_uncali: np.ndarray
    gyro: np.ndarray
    gyro_uncali: np.ndarray
    magn: np.ndarray
    magn_uncali: np.ndarray
    ahrs: np.ndarray
    wifi: np.ndarray
    ibeacon: np.ndarray
    waypoint: np.ndarray

def split_list_as_req(line_data):
    redo = False
    data_BU = []
    header_list = [i for i, itm in enumerate(line_data) if 'TYPE_' in itm]
    if len(header_list) > 1:
        data_BU = [line_data[header_list[1]-1][-13:]] + line_data[header_list[1]:]
        line_data[header_list[1]-1] = line_data[header_list[1]-1][:-13]
        line_data = line_data[:header_list[1]]
        redo = True
    return redo, line_data, data_BU

def read_data_file(data_filename):
    acce = []
    acce_uncali = []
    gyro = []
    gyro_uncali = []
    magn = []
    magn_uncali = []
    ahrs = []
    wifi = []
    ibeacon = []
    waypoint = []

    with open(data_filename, 'r', encoding='utf-8') as file:
        lines = file.readlines()

    i = 0
    redo = False
    while i < len(lines):
        if not redo:
            line_data = lines[i]
            line_data = line_data.strip()
            if not line_data or line_data[0] == '#':
                i += 1
                continue
            line_data = line_data.split('\t')
        else:
            line_data = data_BU
            redo = False

        redo, line_data, data_BU = split_list_as_req(line_data)

        if line_data[1] == 'TYPE_ACCELEROMETER':
            try:
                acce.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])
            except ValueError:
                print(data_filename)
                print(line_data)

        elif line_data[1] == 'TYPE_ACCELEROMETER_UNCALIBRATED':
            acce_uncali.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])

        elif line_data[1] == 'TYPE_GYROSCOPE':
            gyro.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])

        elif line_data[1] == 'TYPE_GYROSCOPE_UNCALIBRATED':
            gyro_uncali.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])

        elif line_data[1] == 'TYPE_MAGNETIC_FIELD':
            magn.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])

        elif line_data[1] == 'TYPE_MAGNETIC_FIELD_UNCALIBRATED':
            magn_uncali.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])

        elif line_data[1] == 'TYPE_ROTATION_VECTOR':
            ahrs.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])

        elif line_data[1] == 'TYPE_WIFI':
            sys_ts = line_data[0]
            ssid = line_data[2]
            bssid = line_data[3]
            rssi = line_data[4]
            lastseen_ts = line_data[6]
            frequency = line_data[5]
            wifi_data = [sys_ts, ssid, bssid, rssi, lastseen_ts, frequency]
            wifi.append(wifi_data)

        elif line_data[1] == 'TYPE_BEACON':
            ts = line_data[0]
            uuid = line_data[2]
            major = line_data[3]
            minor = line_data[4]
            rssi = line_data[6]
            txpow = line_data[5]
            distance = line_data[7]
            mac = line_data[8]
            if len(line_data)>9:
                ts_copy = line_data[9]
            else:
                ts_copy = ts
            ibeacon_data = [ts, '_'.join([uuid, major, minor]), rssi,
                            txpow, distance, mac, ts_copy]
            ibeacon.append(ibeacon_data)

        elif line_data[1] == 'TYPE_WAYPOINT':
            waypoint.append([int(line_data[0]), float(line_data[2]), float(line_data[3])])

        if not redo:
            i += 1

    acce = np.array(acce)
    acce_uncali = np.array(acce_uncali)
    gyro = np.array(gyro)
    gyro_uncali = np.array(gyro_uncali)
    magn = np.array(magn)
    magn_uncali = np.array(magn_uncali)
    ahrs = np.array(ahrs)
    wifi = np.array(wifi)
    ibeacon = np.array(ibeacon)
    waypoint = np.array(waypoint)

    return ReadData(acce, acce_uncali, gyro, gyro_uncali, magn, magn_uncali, ahrs, wifi, ibeacon, waypoint)

"test_site_dict.pkl" in the below cell is the dictionary consists of wifi bssid and the ibeacon MAC address in the test data, and created with [this notebook](https://www.kaggle.com/horsek/ilnpre1-create-testsitedict).  

"df_TestTimeLag" in the below cell is a time lag table for each path in the test data, and created with [this notebook](https://www.kaggle.com/horsek/iln-preprocess-time-lag-table-of-test-data).

In [7]:
with open("test_site_dict.pkl", "rb") as f:
    test_site_dict = pickle.load(f)
df_TestTimeLag = pd.read_csv('test_ts_lag.csv',index_col=0)

In [8]:
'''
Create sequence of time & relative position data for each path
Outputs are as follows:
  PathSeq_t: timestamp
  PathSeq_l: length position (cumulative summation (cumsum) of walking distance)
             at each timestamp
  PathSeq_rx, PathSeq_ry: relative position
'''
def Path_Sequence(PathData):
    step_timestamps, step_indexs, step_acce_max_mins = \
        F.compute_steps(PathData.acce)
    headings = F.compute_headings(PathData.ahrs)
    stride_lengths = F.compute_stride_length(step_acce_max_mins)
    step_headings = F.compute_step_heading(step_timestamps, headings)
    PathSeq_t = stride_lengths[:,0]
    PathSeq_t = np.insert(PathSeq_t, 0,
                          PathData.acce[0,0]).astype('int64')
    PathSeq_l = stride_lengths[:,1].cumsum()
    PathSeq_l = np.append(np.array(0),PathSeq_l)

    rel_pos = F.compute_rel_positions(stride_lengths, step_headings)
    PathSeq_rx = np.append(np.array(0),rel_pos[:,1]).cumsum()
    PathSeq_ry = np.append(np.array(0),rel_pos[:,2]).cumsum()
    return PathSeq_t, PathSeq_l, PathSeq_rx, PathSeq_ry

In [9]:
'''
Create pd.DataFrame of wifi signal data for each path
Basic format is the same as that created with Kouki's great notebook.
(https://www.kaggle.com/kokitanisaka/create-unified-wifi-features-example)
- rows: timestamp (or corresponding Length position)
- columns: signal id and strength, which are sorted by strength at the same timestamp
'''
def dfSignalSequence(df_sign, ref_time, id_list,
                     id_, values, tslabel, n_in_seq):

    ''' Drop the data out of "tslabel" range '''
    mask = df_sign[id_].apply(lambda x: x in id_list)
    mask &= (df_sign[tslabel] >= ref_time[0])
    mask &= (df_sign[tslabel] <= ref_time[-1])
    df_sign = df_sign[mask]
    if df_sign.shape[0]==0:
        return

    ''' Drop the data before the previous timestamp '''
    df_tmp = pd.DataFrame(df_sign[tslabel].unique(),columns=[tslabel])
    df_tmp['last_ts'] = np.append(ref_time[0],
                                  df_tmp[tslabel].values[:-1])
    df_sign = df_sign.merge(df_tmp, how='left', on=tslabel)
    del df_tmp; gc.collect()
    df_sign = df_sign[df_sign['lastseen_ts']>=df_sign['last_ts']]
    df_sign.drop('last_ts', axis=1, inplace=True)
    if df_sign.shape[0]==0:
        return

    ''' create pivot table '''
    df_sign.set_index(tslabel, inplace=True)
    df_pivot = df_sign.pivot_table(index=df_sign.index,
                                   columns=id_, values=values,
                                   aggfunc=max)
    feat = []
    for i in range(df_pivot.shape[0]):
        tmp = df_pivot.iloc[i,:].sort_values(ascending=False)[:n_in_seq]
        if tmp.shape[0]!=n_in_seq:
            n_col = tmp.shape[0]
            add_col = list(set(id_list)-set(tmp.index))[:n_in_seq-n_col]
            tmp = pd.concat([tmp, pd.Series(np.nan, index=add_col)])
            assert tmp.shape[0]==n_in_seq
        tmp = tmp.reset_index().values.T.reshape(-1)
        feat.append(tmp)
    feat = np.stack(feat,axis=1).T
    df_out = pd.DataFrame(feat,
                          columns=[f'id_{i}' for i in range(n_in_seq)]
                                 +[f'strength_{i}' for i in range(n_in_seq)])
    df_out[tslabel] = df_pivot.index
    df_out.set_index(tslabel, drop=True, inplace=True)
    return df_out

In [10]:
''' Retrieve the Site ID from txt file '''
def SiteID(txt):
    p1 = txt[1].find('SiteID:')+7
    p2 = txt[1].find('\tSiteName:')
    assert p1!=-1+7 and p2!=-1, 'SiteID not found'
    return txt[1][p1:p2]

In [11]:
def headings_from_magn(PathData):
    mag_df = pd.DataFrame(PathData.magn)
    mag_df.columns = ["timestamp","x","y","z"]
    acce_df = pd.DataFrame(PathData.acce)
    acce_df.columns = ["timestamp","ax","ay","az"]
    mag_df = pd.merge(mag_df,acce_df,on="timestamp").dropna()

    m_trans = 0
    time_di_list = []
    for i in mag_df.iterrows():

        ''' https://www.kaggle.com/museas/with-magn-cost-minimization '''
        gx,gy,gz = i[1][1],i[1][2],i[1][3]
        ax,ay,az = i[1][4],i[1][5],i[1][6]
        roll = math.atan2(ay,az)
        pitch = math.atan2(-1*ax , (ay * math.sin(roll) + az * math.cos(roll)))
        q = m_trans - math.atan2(
            (gz*math.sin(roll)-gy*math.cos(roll)),(gx*math.cos(pitch) + gy*math.sin(roll)*math.sin(pitch) + gz*math.sin(pitch)*math.cos(roll))
        ) -np.pi/2
        q = (q+np.pi)%(2*np.pi)-np.pi

        ''' The following is a different calculation for verification '''
#         mag, acce = i[1][1:4].values, i[1][4:7].values
#         axsz = acce/np.linalg.norm(acce)
#         axsy = mag/np.linalg.norm(mag)
#         axsy -= np.sum(axsy*axsz) * axsz
#         axsy = axsy/np.linalg.norm(axsy)
#         axsx = np.cross(axsy, axsz)
#         q1 = np.arctan2(axsx[1],axsy[1])
#         q1 = (q1+np.pi)%(2*np.pi)-np.pi

        time_di_list.append((i[1][0],q))
    return np.array(time_di_list)

def add_IMU(PathData, df_inpt, time_delta=0):

    headmagn = headings_from_magn(PathData)
    df_IMU = pd.DataFrame(PathData.gyro,columns=['IMU_ts','gyro_x','gyro_y','gyro_z'])
    df_IMU['IMU_ts'] = (df_IMU['IMU_ts']+time_delta).astype('int64')
#     assert (df_IMU['IMU_ts'].values==headmagn[:,0]).all()
#     assert (df_IMU['IMU_ts'].values==PathData.acce[:,0]).all()
#     assert (df_IMU['IMU_ts'].values==PathData.ahrs[:,0]).all()
    df_IMU['head_magn_x'] = np.sin(headmagn[:,1])
    df_IMU['head_magn_y'] = np.cos(headmagn[:,1])
    df_IMU[['acce_x','acce_y','acce_z']] = PathData.acce[:,1:]
    df_IMU[['ahrs_x','ahrs_y','ahrs_z']] = PathData.ahrs[:,1:]
    df_IMU[['magn_x','magn_y','magn_z']] = PathData.magn[:,1:]

    ''' set window label '''
    grp_ts = df_inpt.index.name
    df_IMU = pd.concat([pd.DataFrame([df_inpt.index]*2,index=['IMU_ts',grp_ts]).T,
                        df_IMU], axis=0)
    df_IMU = df_IMU.sort_values('IMU_ts')
    df_IMU[grp_ts].fillna(method='bfill',inplace=True)
    df_IMU.dropna(inplace=True)
    df_IMU[grp_ts] = df_IMU[grp_ts].astype('int64')
    df_IMU.drop('IMU_ts', axis=1, inplace=True)

    ''' grouping by timestamp of wifi data '''
    gdf_IMU = pd.concat([df_IMU.groupby(grp_ts).mean().add_suffix('_mean'),
                         df_IMU.groupby(grp_ts).std().add_suffix('_std'),
                         df_IMU.groupby(grp_ts).max().add_suffix('_max'),
                         df_IMU.groupby(grp_ts).min().add_suffix('_min'),
                         df_IMU.groupby(grp_ts).skew().add_suffix('_skew')],axis=1)

    return df_inpt.merge(gdf_IMU, how='left', left_index=True, right_index=True)

In [12]:
tmp_list = []

In [14]:
mkdir unzip_dir

In [None]:
# prompt: unzip loacation file to unzip_dir

!unzip -d unzip_dir indoor-location-navigation.zip


In [16]:
''' train data processing '''
for i_site, site in enumerate(site_list):

    print(f'========== {i_site+1}/{len(site_list)} ==========')
    print(f'site: {site}')
    test_wifi = sorted(test_site_dict[site]['wifi'])
    print(f'num of wifi BSSID in test set: {len(test_wifi)}')

    AllPathData = []
    floor_dirs = sorted(glob.glob(f'{input_dir}/train/{site}/*'))
    floors = [dir_.split('/')[-1] for dir_ in floor_dirs]

    print(f'floors: {floors}')

    print('\n=== Read Path Data ===')
    time.sleep(1)
    for i, floor_dir_ in enumerate(floor_dirs):
        path_files = sorted(glob.glob(f'{floor_dir_}/*'))
        dic_temp = {}
        for path_file_ in tqdm(path_files, desc=floors[i]):
            path_name = path_file_.split('/')[-1].replace('.txt','')
            dic_temp[path_name] = read_data_file(path_file_)
        AllPathData.append(dic_temp)
    del dic_temp; gc.collect()

    print('\n=== Create features ===')
    time.sleep(1)
    for i_floor, FloorPathData in enumerate(AllPathData):
        for PathName, PathData in tqdm(FloorPathData.items(),
                                       desc=floors[i_floor]):
            ''' Path Sequence '''
            PathSeq_t, PathSeq_l, PathSeq_rx, PathSeq_ry = Path_Sequence(PathData)

            ''' wifi DataFrame '''
            if PathData.wifi.shape[0]==0:
                continue
            df_wifi = pd.DataFrame(PathData.wifi,
                                   columns=['sys_ts','ssid','bssid','rssi',
                                            'lastseen_ts','frequency'])
            df_wifi['sys_ts']=df_wifi['sys_ts'].astype(np.int64)
            df_wifi['lastseen_ts']=df_wifi['lastseen_ts'].astype(np.int64)
            df_wifi['rssi']=df_wifi['rssi'].astype(np.int64)

            ''' Signal Sequence '''
            df_feat = dfSignalSequence(df_wifi, PathSeq_t, test_wifi,
                                       'bssid','rssi','sys_ts',100)
            if df_feat is None:
                continue

            ''' Compute length position & relative position by interpolation '''
            itp_l = interp1d(PathSeq_t, PathSeq_l,
                             fill_value=(PathSeq_l[0],PathSeq_l[-1]),
                             bounds_error=False)
            df_feat['len_pos'] = itp_l(df_feat.index)
            itp_rx = interp1d(PathSeq_t, PathSeq_rx,
                              fill_value=(PathSeq_rx[0],PathSeq_rx[-1]),
                              bounds_error=False)
            itp_ry = interp1d(PathSeq_t, PathSeq_ry,
                              fill_value=(PathSeq_ry[0],PathSeq_ry[-1]),
                              bounds_error=False)
            df_feat['rel_x'] = itp_rx(df_feat.index)
            df_feat['rel_y'] = itp_ry(df_feat.index)

            ''' IMU data '''
            df_feat = add_IMU(PathData, df_feat)

            ''' x, y, floor, labels '''
#             idx = np.abs(PathData.waypoint[:,0:1]-
#                          df_feat.index.values.reshape(1,-1)).argmin(axis=0)
#             df_feat[['x','y']]=PathData.waypoint[idx,1:]

            GT_t, GT_x, GT_y = [PathData.waypoint[:,i] for i in range(3)]
            GT_l = itp_l(GT_t)
            itp_x = interp1d(GT_l, GT_x, fill_value=(GT_x[0],GT_x[-1]),
                             bounds_error=False)
            itp_y = interp1d(GT_l, GT_y, fill_value=(GT_y[0],GT_y[-1]),
                             bounds_error=False)
            df_feat['x'] = itp_x(df_feat['len_pos'])
            df_feat['y'] = itp_y(df_feat['len_pos'])
            df_feat['floor'] = floor_map[floors[i_floor]]
            df_feat['path'] = PathName
            df_feat['site'] = site
            df_feat['train/test'] = 'train'

            tmp_list.append(df_feat)

site: 5d2709a003f801723c3251bf
num of wifi BSSID in test set: 885
floors: ['1F', '2F', '3F', '4F']

=== Read Path Data ===


1F: 100%|██████████| 123/123 [00:09<00:00, 13.19it/s]
2F: 100%|██████████| 102/102 [00:08<00:00, 12.07it/s]
3F: 100%|██████████| 25/25 [00:02<00:00, 11.78it/s]
4F: 100%|██████████| 60/60 [00:05<00:00, 11.70it/s]



=== Create features ===


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_sign.drop('last_ts', axis=1, inplace=True)
1F: 100%|██████████| 123/123 [00:55<00:00,  2.22it/s]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_sign.drop('last_ts', axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_sign.drop('last_ts', axis=1, inplace=True)
2F: 100%|██████████| 102/102 [00:51<00:00,  1.98it/s]
3F: 100%|██████████| 25/25 [00:13<00:00,  1.91it/s]
4F: 100%|██████████| 60/60 [00:31<00:00,  1.92it/s]


site: 5a0546857ecc773753327266
num of wifi BSSID in test set: 2435
floors: ['B1', 'F1', 'F2', 'F3', 'F4']

=== Read Path Data ===


B1: 100%|██████████| 109/109 [00:13<00:00,  7.97it/s]
F1: 100%|██████████| 131/131 [00:17<00:00,  7.48it/s]
F2: 100%|██████████| 110/110 [00:23<00:00,  4.67it/s]
F3: 100%|██████████| 78/78 [00:10<00:00,  7.10it/s]
F4: 100%|██████████| 86/86 [00:19<00:00,  4.34it/s]



=== Create features ===


B1:   6%|▋         | 7/109 [00:06<01:37,  1.05it/s]


KeyboardInterrupt: ignored

In [None]:
''' test data processing '''
print('\n=== Read Path Data ===')
test_files = sorted(glob.glob(f'{input_dir}/test/*.txt'))
TestPathData,TestSiteName = {},{}
time.sleep(1)
for path_file_ in tqdm(test_files):
    path_name = path_file_.split('/')[-1].replace('.txt','')
    TestPathData[path_name] = read_data_file(path_file_)

    with open(path_file_, 'r', encoding="utf-8") as f:
        txt = f.readlines()
    TestSiteName[path_name] = SiteID(txt)

print('\n=== Create features ===')
time.sleep(1)
for PathName, PathData in tqdm(TestPathData.items()):
    site = TestSiteName[PathName]
    test_wifi = sorted(test_site_dict[site]['wifi'])
    time_lag = df_TestTimeLag.loc[PathName,'time_lag']

    ''' Path Sequence '''
    PathSeq_t, PathSeq_l, PathSeq_rx, PathSeq_ry = Path_Sequence(PathData)
    PathSeq_t += time_lag

    ''' wifi DataFrame '''
    if PathData.wifi.shape[0]==0:
        continue
    df_wifi = pd.DataFrame(PathData.wifi,
                           columns=['sys_ts','ssid','bssid','rssi',
                                    'lastseen_ts','frequency'])
    df_wifi['sys_ts']=df_wifi['sys_ts'].astype(np.int64)
    df_wifi['sys_ts'] += time_lag
    df_wifi['lastseen_ts']=df_wifi['lastseen_ts'].astype(np.int64)
    df_wifi['rssi']=df_wifi['rssi'].astype(np.int64)

    ''' Signal Sequence '''
    df_feat = dfSignalSequence(df_wifi, PathSeq_t, test_wifi,
                               'bssid','rssi','sys_ts',100)
    if df_feat is None:
        continue

    ''' Compute length position & relative position by interpolation '''
    itp_l = interp1d(PathSeq_t, PathSeq_l,
                     fill_value=(PathSeq_l[0],PathSeq_l[-1]),
                     bounds_error=False)
    df_feat['len_pos'] = itp_l(df_feat.index)
    itp_rx = interp1d(PathSeq_t, PathSeq_rx,
                      fill_value=(PathSeq_rx[0],PathSeq_rx[-1]),
                      bounds_error=False)
    itp_ry = interp1d(PathSeq_t, PathSeq_ry,
                      fill_value=(PathSeq_ry[0],PathSeq_ry[-1]),
                      bounds_error=False)
    df_feat['rel_x'] = itp_rx(df_feat.index)
    df_feat['rel_y'] = itp_ry(df_feat.index)

    ''' IMU data '''
    df_feat = add_IMU(PathData, df_feat, time_lag)

    ''' x, y, floor, labels '''
    df_feat[['x','y']] = np.nan
    df_feat['floor'] = np.nan
    df_feat['path'] = PathName
    df_feat['site'] = site
    df_feat['train/test'] = 'test'

    tmp_list.append(df_feat)


=== Read Path Data ===


100%|██████████| 626/626 [04:49<00:00,  2.16it/s]



=== Create features ===


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_sign.drop('last_ts', axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_sign.drop('last_ts', axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_sign.drop('last_ts', axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_sign.drop('last_ts', axis=1

In [None]:
df_wifi_all = pd.concat(tmp_list)
df_wifi_all.reset_index(inplace=True)

#null check
df_wifi_all[[f'id_{i}' for i in range(100)]].isnull().any().any()

False

In [None]:
with open(f'{file_header}_df_wifi_all.pkl','wb') as f:
    pickle.dump(df_wifi_all, f)