#### Import Modules

In [3]:
import pandas as pd
from sklearn.preprocessing import StandardScaler


#### Import Data

In [9]:
# Load the LOS file and add a label column
los_df = pd.read_excel('LOS_DATA.xlsx')
los_df['Label'] = 1  # LOS = 1

# Load the NLOS file and add a label column
nlos_df = pd.read_excel('NLOS_DATA.xlsx')
nlos_df['Label'] = 0  # NLOS = 0


In [10]:
los_df.head()

Unnamed: 0,Year,Month,Date,Hour,Min,Sec,PRN,Elevation,Azimuth,SNR,Label
0,,,,,,,,,,,1
1,2023.0,6.0,2.0,0.0,34.0,56.0,GPS/ 8,14.09,177.32,40.22,1
2,2023.0,6.0,2.0,0.0,34.0,58.0,GPS/ 8,14.1,177.31,40.51,1
3,2023.0,6.0,2.0,0.0,35.0,0.0,GPS/ 8,14.1,177.3,40.84,1
4,2023.0,6.0,2.0,0.0,35.0,2.0,GPS/ 8,14.11,177.29,41.04,1


In [11]:
nlos_df.head()

Unnamed: 0,Year,Month,Date,Hour,Min,Sec,PRN,Elevation,Azimuth,SNR,Label
0,2023,6,1,23,52,0,GPS/ 3,40.53,325.34,48.42,0
1,2023,6,1,23,52,2,GPS/ 3,40.54,325.34,48.34,0
2,2023,6,1,23,52,4,GPS/ 3,40.56,325.33,48.49,0
3,2023,6,1,23,52,6,GPS/ 3,40.57,325.32,48.78,0
4,2023,6,1,23,52,8,GPS/ 3,40.58,325.32,48.84,0


#### Preprocess Data

In [12]:
df = pd.concat([los_df, nlos_df], ignore_index=True) #Combine both files

In [15]:
df.head()

Unnamed: 0,Year,Month,Date,Hour,Min,Sec,PRN,Elevation,Azimuth,SNR,Label
0,,,,,,,,,,,1
1,2023.0,6.0,2.0,0.0,34.0,56.0,GPS/ 8,14.09,177.32,40.22,1
2,2023.0,6.0,2.0,0.0,34.0,58.0,GPS/ 8,14.1,177.31,40.51,1
3,2023.0,6.0,2.0,0.0,35.0,0.0,GPS/ 8,14.1,177.3,40.84,1
4,2023.0,6.0,2.0,0.0,35.0,2.0,GPS/ 8,14.11,177.29,41.04,1


In [16]:
df.tail()

Unnamed: 0,Year,Month,Date,Hour,Min,Sec,PRN,Elevation,Azimuth,SNR,Label
26484,2023.0,6.0,2.0,23.0,50.0,7.0,GPS/ 3,41.49,324.93,49.42,0
26485,2023.0,6.0,2.0,23.0,50.0,7.0,GPS/ 3,41.49,324.93,49.42,0
26486,2023.0,6.0,2.0,23.0,50.0,7.0,GPS/ 3,41.49,324.93,49.42,0
26487,2023.0,6.0,2.0,23.0,50.0,7.0,GPS/ 3,41.49,324.93,49.42,0
26488,2023.0,6.0,2.0,23.0,50.0,7.0,GPS/ 3,41.49,324.93,49.42,0


In [17]:
df = df[['SNR', 'Elevation', 'Azimuth', 'Label']] # Select Required Columns

In [23]:
missing_rows = df[df.isnull().any(axis=1)]
print(missing_rows)
# The missing column is from the first line of LOS_Data file

   SNR  Elevation  Azimuth  Label
0  NaN        NaN      NaN      1


In [25]:
df = df.dropna()

In [26]:
X = df[['SNR', 'Elevation', 'Azimuth']]
y = df['Label']

In [27]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [30]:
from sklearn.model_selection import train_test_split

X = df[['SNR', 'Elevation', 'Azimuth']]
y = df['Label']  # 1 = LOS, 0 = NLOS

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)


#### Logistic Regression

In [36]:
from sklearn.linear_model import LogisticRegression

logreg_model = LogisticRegression(C=1.0, penalty='l2', solver='liblinear', random_state=42)
logreg_model.fit(X_train, y_train)


#### Support Vector Machine (SVM)

In [37]:
from sklearn.svm import SVC

svm_model = SVC(kernel='rbf', C=1.0, gamma='scale', random_state=42)
svm_model.fit(X_train, y_train)


#### Random Forest

In [38]:
from sklearn.ensemble import RandomForestClassifier

rf_model = RandomForestClassifier(
    n_estimators=100, max_depth=None, random_state=42
)
rf_model.fit(X_train, y_train)


#### XGBoost

In [39]:
from xgboost import XGBClassifier

xgb_model = XGBClassifier(
    n_estimators=100,
    max_depth=4,
    learning_rate=0.1,
    subsample=0.8,
    colsample_bytree=0.8,
    use_label_encoder=False,
    eval_metric='logloss',
    random_state=42
)
xgb_model.fit(X_train, y_train)


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


#### Testing Models

In [40]:
from sklearn.metrics import accuracy_score

# Predict on test set
logreg_preds = logreg_model.predict(X_test)
svm_preds = svm_model.predict(X_test)
rf_preds = rf_model.predict(X_test)
xgb_preds = xgb_model.predict(X_test)

# Print accuracy
print("Logistic Regression Accuracy:", accuracy_score(y_test, logreg_preds))
print("SVM Accuracy:", accuracy_score(y_test, svm_preds))
print("Random Forest Accuracy:", accuracy_score(y_test, rf_preds))
print("XGBoost Accuracy:", accuracy_score(y_test, xgb_preds))


Logistic Regression Accuracy: 0.721593053982635
SVM Accuracy: 0.9867874669686674
Random Forest Accuracy: 1.0
XGBoost Accuracy: 1.0


#### Comparing Models with K-Fold Vaildation

In [41]:
from sklearn.model_selection import cross_val_score

# Use k=5 for 5-fold cross-validation
k = 5

print("Logistic Regression CV Accuracy:", cross_val_score(logreg_model, X, y, cv=k).mean())
print("SVM CV Accuracy:", cross_val_score(svm_model, X, y, cv=k).mean())
print("Random Forest CV Accuracy:", cross_val_score(rf_model, X, y, cv=k).mean())
print("XGBoost CV Accuracy:", cross_val_score(xgb_model, X, y, cv=k).mean())


Logistic Regression CV Accuracy: 0.6586147575431237
SVM CV Accuracy: 0.5747371336995455
Random Forest CV Accuracy: 0.7586082366187604


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


XGBoost CV Accuracy: 0.7055942119277613
