# NB-IoT Localization - WKNN

## Data loading

In [1]:
import os

import numpy as np
import pandas as pd

from scripts.data_loader import load_matlab_file_as_df

# set options
rf_param = 'NSINR'

# source file
BASE_DIR = "data/"
FULL_DATA_SET = "Campaign_data_NBIoT_1_2_3_4_5_6_interpolated_smoothed.mat"
filename = os.path.join(BASE_DIR, FULL_DATA_SET)

# load the dataset as pandas dataframe
df = load_matlab_file_as_df(
    filename=filename,
    dataset='dataSet_smooth',  # dataSet, dataSet_interp or dataSet_smooth
    usecols=['lat', 'lng', 'measurements_matrix']
)

# better printing of dataframes
pd.set_option('display.max_columns', None)  # Show all columns
pd.set_option('display.max_colwidth', None)  # No limit on column width
pd.set_option('display.width', 1000)  # Set the display width to 1000 characters

# Set the probability for a point to be a test point
TP_probability = 0.3

# Randomly assign points as test points (2) or reference points (1)
np.random.seed(42)  # For reproducibility
df['PointType'] = (np.random.rand(len(df)) <= TP_probability).astype(int) + 1
df_rp = df[df['PointType'] == 1]
df_tp = df[df['PointType'] == 2]

## Data processing

In [2]:

from scripts.weighted_coverage import create_point_matrix
import numpy as np

# Get unique npcis
npcis_rp = np.concatenate(df_rp['measurements_matrix'].apply(lambda x: x['NPCI'].values).values)
npcis_tp = np.concatenate(df_tp['measurements_matrix'].apply(lambda x: x['NPCI'].values).values)

all_npcis = np.concatenate([npcis_rp, npcis_tp])
unique_npcis = np.unique(all_npcis)

m_rfp, idx_rfp = create_point_matrix(df_rp, unique_npcis, rf_param)

# Create matrices for test points 
m_tp, idx_tp = create_point_matrix(df_tp, unique_npcis, rf_param)

## Compute weights

In [3]:
from scipy.spatial.distance import cdist

# Caclulate distances between tps and rps
D = cdist(m_tp, m_rfp, metric='euclidean')

# Normalize distances based on common NPCIs
for i in range(m_tp.shape[0]):
    match = np.logical_and(idx_tp[i, :], idx_rfp)
    s = np.sum(match, axis=1)
    z = np.where(s == 0)
    nz = np.where(s != 0)

    for j in nz[0]:
        D[i, j] = D[i, j] / s[j]
    for j in z[0]:
        D[i, j] = np.inf  # Use np.inf to represent a very large distance

# Set distances to dummy reference points to a very large value
dummy_rfps = np.all(idx_rfp == 0, axis=1)
D[:, dummy_rfps] = np.inf

# Replace zero distances with a small value to avoid singularities
D[D == 0] = np.min(D[D != 0]) / 20

# Sort distances and compute weights
D_sort = np.sort(D, axis=1)
idx_sort = np.argsort(D, axis=1)
W = 1.0 / D_sort

## WKNN

In [4]:
from scripts.haversine import haversine_distance

num_tps = df_tp.shape[0]
k_max = 40
k_values = range(1, k_max + 1)
TP_est_location = [None] * len(k_values)
k_avg_error = {}

# Extract real positions of test points
real_lat = df_tp['lat'].values
real_long = df_tp['lng'].values
real_position = np.vstack((real_lat, real_long)).T

# Loop over each k value
for i, this_k in enumerate(k_values):
    # Select the k-nearest reference points
    RFP_selected_idx = idx_sort[:, :this_k]

    # Extract coordinates of the selected reference points
    lat_k_RFP_matrix = df_rp.iloc[RFP_selected_idx.flatten()]['lat'].values.reshape(RFP_selected_idx.shape)
    long_k_RFP_matrix = df_rp.iloc[RFP_selected_idx.flatten()]['lng'].values.reshape(RFP_selected_idx.shape)

    # Compute weighted sums of coordinates
    sum_lat = np.sum(lat_k_RFP_matrix * W[:, :this_k], axis=1)
    sum_long = np.sum(long_k_RFP_matrix * W[:, :this_k], axis=1)

    # Compute estimated coordinates of test points
    lat_k_TP = sum_lat / np.sum(W[:, :this_k], axis=1)
    long_k_TP = sum_long / np.sum(W[:, :this_k], axis=1)

    # Compute errors using Haversine formula
    km_pow = haversine_distance(real_position[:, 0], real_position[:, 1], lat_k_TP, long_k_TP)
    average_error_pow = np.mean(km_pow)

    k_avg_error[this_k] = average_error_pow

    # Store estimated locations
    TP_est_location_k = np.zeros((num_tps, 2))
    TP_est_location_k[:, 0] = lat_k_TP
    TP_est_location_k[:, 1] = long_k_TP
    TP_est_location[i] = TP_est_location_k


In [5]:


print(f'Average error with k = 3 {k_avg_error[3]}')

Average error with k = 3 0.014426894600619732
