# 5G Localization

## Read matlab files to dataframes

In [3]:
import os

import pandas as pd

from scripts.data_loader import load_matlab_file_as_df
from scripts.weighted_coverage import get_miss_ref_value

# source file
BASE_DIR = "data/"
FULL_DATA_SET = "Campaign_data_NBIoT_1_2_3_4_5_6_interpolated_smoothed.mat"
filename = os.path.join(BASE_DIR, FULL_DATA_SET)

# load the dataset as pandas dataframe
df_smooth = load_matlab_file_as_df(
    filename=filename,
    dataset='dataSet_smooth',  # dataSet, dataSet_interp or dataSet_smooth
    usecols=['lat', 'lng', 'measurements_matrix']
)

df = load_matlab_file_as_df(
    filename=filename,
    dataset='dataSet',  # dataSet, dataSet_interp or dataSet_smooth
    usecols=['lat', 'lng', 'measurements_matrix']
)
# better printing of dataframes
pd.set_option('display.max_columns', None)  # Show all columns
pd.set_option('display.max_colwidth', None)  # No limit on column width
pd.set_option('display.width', 1000)  # Set the display width to 1000 characters


In [5]:
print(df_smooth['measurements_matrix'])

0                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       

## Prepare data

In [None]:
 # Flatten the nested measurements_matrix
flattened_data = []
for idx, row in df.iterrows():
    lat = row['lat']
    lng = row['lng']
    measurements_matrix = row['measurements_matrix']
    for _, measurement in measurements_matrix.iterrows():
        flattened_row = {'lat': lat, 'lng': lng}
        flattened_row.update(measurement.to_dict())
        flattened_data.append(flattened_row)

# Create a new DataFrame from the flattened data
flattened_df = pd.DataFrame(flattened_data)

# Drop invalid rows
flattened_df.dropna(inplace=True)

print(flattened_df)

## Train the model

In [None]:
from scripts.haversine import haversine_distance
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsRegressor
import numpy as np

print(f'Training KNN regressor with {flattened_df.shape[0]} samples')

# Features: Include RSSI and other relevant features
X = flattened_df[['RSSI', 'NPCI', 'eNodeBID', 'NSINR', 'NRSRP', 'NRSRQ', 'ToA', 'operatorID', 'campaignID']]

# Target: Latitude and Longitude
Y = flattened_df[['lat', 'lng']]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

# Initialize and train the KNN regressor
knn = KNeighborsRegressor(n_neighbors=3)
knn.fit(X_train, y_train)

# Predict on the test set
y_pred = knn.predict(X_test)

# Calculate Haversine distances for each pair of true and predicted coordinates
distances = np.array(
    [haversine_distance(y_test.iloc[i, 0], y_test.iloc[i, 1], y_pred[i, 0], y_pred[i, 1]) for i in
     range(len(y_test))])

distances_km = distances[:, 0]

# Calculate the Mean Squared Error of the Haversine distances
mse_haversine = np.mean(distances_km ** 2)

print(f'Mean Squared Error (Haversine Distance): {mse_haversine}')

# Weighted Coverage strategy


In [None]:


# Main script
# Assuming flattened_df is your DataFrame containing the data
tp_probability = 0.3
flattened_df['point_type'] = np.random.rand(flattened_df.shape[0]) <= tp_probability

rps = flattened_df[flattened_df['point_type'] == False].reset_index(drop=True)
tps = flattened_df[flattened_df['point_type'] == True].reset_index(drop=True)

operators = [1, 10]
rf_param = 'NSINR'
miss_ref_value = get_miss_ref_value(rf_param)

# Concatenate, drop duplicates, and reset index
unique_npcis = (pd.concat([rps[['NPCI', 'operatorID']], tps[['NPCI', 'operatorID']]])
                .drop_duplicates()
                .reset_index(drop=True)
                .astype({'NPCI': 'int', 'operatorID': 'int'}))

# Filter the unique NPCIs to only include the selected operators
unique_npcis = unique_npcis[unique_npcis['operatorID'].isin(operators)]

# Print the value counts of NPCIs and the resulting unique_npcis DataFrame
print(unique_npcis['NPCI'].value_counts())
print(unique_npcis)

# m_rfp = create_point_matrix(rps, unique_npcis, rf_param)
# m_tp = create_point_matrix(tps, unique_npcis, rf_param)
# 
# # Calculate distances
# d = cdist(m_tp, m_rfp, 'euclidean')
# 
# # Adjust distances for common NPCIs
# idx1 = (m_tp != miss_ref_value).astype(int)
# idx2 = (m_rfp != miss_ref_value).astype(int)
# tps_located = np.zeros(tps.shape[0], dtype=bool)
# 
# for i in range(tps.shape[0]):
#     s = np.sum(np.logical_and(idx1[i, :], idx2), axis=1)
#     tps_located[i] = np.any(s > 0)
#     d[i, s == 0] = np.inf
#     if np.any(s != 0):
#         d[i, s != 0] /= s[s != 0]
# 
# # Handle zero distances
# d[d == 0] = np.min(d[d != 0]) / 20
# 
# print(d[0])

In [None]:
# Sort distances and calculate weights
d_sort = np.sort(d, axis=1)
idx_sort = np.argsort(d, axis=1)
w = 1.0 / d_sort

print(d_sort)


In [None]:
# Coordinates of RPs
rfp_coordinates = rps[['lat', 'lng']].values

# Estimate TP locations
k_max = 5  # Example value, adjust as needed
tp_est_location = []

for this_k in range(1, k_max + 1):
    rfp_selected_idx = idx_sort[tps_located, :this_k]
    lat_k_rfp_matrix = rfp_coordinates[rfp_selected_idx][:, :, 0]
    long_k_rfp_matrix = rfp_coordinates[rfp_selected_idx][:, :, 1]

    sum_lat = np.sum(lat_k_rfp_matrix * w[tps_located, :this_k], axis=1)
    sum_long = np.sum(long_k_rfp_matrix * w[tps_located, :this_k], axis=1)

    lat_k_tp = sum_lat / np.sum(w[tps_located, :this_k], axis=1)
    long_k_tp = sum_long / np.sum(w[tps_located, :this_k], axis=1)

    tp_est_location_k = np.full((tps.shape[0], 2), np.nan)
    tp_est_location_k[tps_located, 0] = lat_k_tp
    tp_est_location_k[tps_located, 1] = long_k_tp

    tp_est_location.append(tp_est_location_k)


In [None]:
# Extract original positions of the TPs
original_positions = tps[['lat', 'lng']].values

# Initialize arrays to store the estimated positions
estimated_positions = np.full_like(original_positions, np.nan)

# Fill the estimated positions array with the estimated locations
for i, est_pos in enumerate(tp_est_location[-1]):
    if tps_located[i]:
        estimated_positions[i] = est_pos

distances = np.array(
    [haversine_distance(original_positions[i, 0], original_positions[i, 1], estimated_positions[i, 0],
                        estimated_positions[i, 1]) for
     i in
     range(len(original_positions))])

distances_km = distances[:, 0]

distances_km = distances_km[~np.isnan(distances_km)]

# Calculate the Mean Squared Error of the Haversine distances
mse_haversine = np.mean(distances_km ** 2.0)

print(f'Mean Squared Error (Haversine Distance): {mse_haversine}')
