# 5G Localization

## Read matlab files to dataframes

In [2]:
import os

import pandas as pd

from scripts.data_loader import load_matlab_file_as_df

# source file
BASE_DIR = "data/"
FULL_DATA_SET = "Campaign_data_NBIoT_1_2_3_4_5_6_interpolated_smoothed.mat"
filename = os.path.join(BASE_DIR, FULL_DATA_SET)

# load the dataset as pandas dataframe
df = load_matlab_file_as_df(
    filename=filename,
    dataset='dataSet_smooth',  # dataSet, dataSet_interp or dataSet_smooth
    usecols=['lat', 'lng', 'measurements_matrix']
)

# better printing of dataframes
pd.set_option('display.max_columns', None)  # Show all columns
pd.set_option('display.max_colwidth', None)  # No limit on column width
pd.set_option('display.width', 1000)  # Set the display width to 1000 characters

## Prepare data

In [3]:
 # Flatten the nested measurements_matrix
flattened_data = []
for idx, row in df.iterrows():
    lat = row['lat']
    lng = row['lng']
    measurements_matrix = row['measurements_matrix']
    for _, measurement in measurements_matrix.iterrows():
        flattened_row = {'lat': lat, 'lng': lng}
        flattened_row.update(measurement.to_dict())
        flattened_data.append(flattened_row)

# Create a new DataFrame from the flattened data
flattened_df = pd.DataFrame(flattened_data)

# Drop invalid rows
flattened_df.dropna(inplace=True)

print(flattened_df)

             lat        lng   NPCI  eNodeBID    RSSI   NSINR   NRSRP   NRSRQ      ToA  operatorID  campaignID
0      41.824214  12.465250    0.0  316061.0 -57.780   5.150 -66.190  -8.400  5530.90        88.0         1.0
1      41.824214  12.465250   10.0  300043.0 -66.265  22.125 -71.030  -4.750  3530.24         1.0         1.0
2      41.824214  12.465250   52.0  372017.0 -58.600   9.350 -64.980  -6.360  1221.26        88.0         1.0
3      41.824214  12.465250   61.0  316716.0 -58.920  -9.400 -78.770 -20.020  4335.62        88.0         1.0
4      41.824214  12.465250  112.0   69046.0 -63.265  -0.840 -75.580 -12.500  2707.04        10.0         1.0
...          ...        ...    ...       ...     ...     ...     ...     ...      ...         ...         ...
40095  41.870661  12.463569   41.0  318928.0 -61.100 -18.430 -88.290 -27.130  2359.91        88.0         6.0
40096  41.870661  12.463569   78.0   67589.0 -70.890   0.445 -83.365 -12.350  1034.02        10.0         6.0
40097  41.

## Train the model

In [14]:
from scripts.haversine import haversine_distance
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsRegressor
import numpy as np

print(f'Training KNN regressor with {flattened_df.shape[0]} samples')

# Features: Include RSSI and other relevant features
X = flattened_df[['RSSI', 'NPCI', 'eNodeBID', 'NSINR', 'NRSRP', 'NRSRQ', 'ToA', 'operatorID', 'campaignID']]

# Target: Latitude and Longitude
Y = flattened_df[['lat', 'lng']]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

# Initialize and train the KNN regressor   
knn = KNeighborsRegressor(n_neighbors=3)
knn.fit(X_train, y_train)

# Predict on the test set
y_pred = knn.predict(X_test)

# Calculate Haversine distances for each pair of true and predicted coordinates
distances = np.array(
    [haversine_distance(y_test.iloc[i, 0], y_test.iloc[i, 1], y_pred[i, 0], y_pred[i, 1]) for i in range(len(y_test))])

# Calculate the Mean Squared Error of the Haversine distances
mse_haversine = np.mean(distances ** 2)
print(f'Mean Squared Error (Haversine Distance): {mse_haversine}')

Training KNN regressor with 39966 samples
Mean Squared Error (Haversine Distance): 0.058893924560045115
