# 5G Localization

## Read matlab files to dataframes

In [6]:
import os
import pandas as pd
from scripts.data_loader import load_matlab_file_as_df

# source file
BASE_DIR = "data/"
FULL_DATA_SET = "Campaign_data_NBIoT_1_2_3_4_5_6_interpolated_smoothed.mat"
filename = os.path.join(BASE_DIR, FULL_DATA_SET)

# load the dataset as pandas dataframe
df = load_matlab_file_as_df(
    filename=filename, 
    dataset='dataSet_smooth', # dataSet, dataSet_interp or dataSet_smooth
    usecols=['lat','lng','measurements_matrix']
)

# better printing of dataframes
pd.set_option('display.max_columns', None)  # Show all columns
pd.set_option('display.max_colwidth', None)  # No limit on column width
pd.set_option('display.width', 1000)  # Set the display width to 1000 characters


print(df.head(1))



         lat       lng                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    measurements_matrix
0  41.824214  12.46525     NPCI  eNodeBID    RSSI    NSINR    NRSRP    NRSRQ      ToA  operatorID  campaignID
0     0    316061 -57.780   5.1500 -66.1900  -8.4000  5530.90          88           1
1    1

## Prepare data

In [17]:
 # Flatten the nested measurements_matrix
flattened_data = []
for idx, row in df.iterrows():
    lat = row['lat']
    lng = row['lng']
    measurements_matrix = row['measurements_matrix']
    for _, measurement in measurements_matrix.iterrows():
        flattened_row = {'lat': lat, 'lng': lng}
        flattened_row.update(measurement.to_dict())
        flattened_data.append(flattened_row)

# Create a new DataFrame from the flattened data
flattened_df = pd.DataFrame(flattened_data)

# Drop invalid rows
flattened_df.dropna(inplace=True)

## Train the model

In [48]:
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error


print(f'Training KNN regressor with {flattened_df.shape[0]} samples')
X = flattened_df[['lat', 'lng', 'NPCI', 'eNodeBID', 'NSINR', 'NRSRP', 'NRSRQ', 'ToA', 'operatorID', 'campaignID']]
Y = flattened_df['RSSI']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

# Initialize and train the KNN regressor
knn = KNeighborsRegressor(n_neighbors=3)
knn.fit(X_train, y_train)

# Predict on the test set
y_pred = knn.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')

Training KNN regressor with 39966 samples
Mean Squared Error: 1.393609890808636
