In [2]:
import pandas as pd  
import numpy as np  
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split  
from sklearn.metrics import classification_report, confusion_matrix 
from sklearn.metrics import accuracy_score
import xgboost as xgb

In [3]:
# 2. Importing the dataset
#Download the dataset as a CSV file and store in the local directory. 
#To read data from CSV file, the simplest way is to use read_csv method of the pandas library. 
wifidata = pd.read_csv("wifi_data_train.csv")

In [4]:
# 3. Exploratory Data Analysis
#check the dimensions of the data and see first few records
print("Dimensions of the data:")
print(wifidata.shape)
print("\nFirst few records:")
print(wifidata.head())

Dimensions of the data:
(1860, 10)

First few records:
   UoM_Wireless1  UoM_Wireless6  UoM_Wireless11  eduroam1  eduroam6  \
0           -100            -72             -71      -100       -71   
1           -100            -54             -90      -100       -53   
2           -100            -83             -90      -100       -84   
3           -100           -100            -100      -100      -100   
4           -100            -75             -83      -100       -67   

   eduroam11  Jungle Book10  PROLINK_H5004NK_8766E11  UNIC-wifi11  id  
0        -75           -100                     -100         -100   5  
1       -100            -84                      -63         -100   1  
2        -88            -90                     -100         -100   2  
3       -100           -100                     -100         -100   3  
4       -100           -100                     -100         -100   7  


In [5]:
# 4. Data Preprocessing
# To divide the data into attributes and labels
X = wifidata.drop('id', axis=1)  #contains attributes
y = wifidata['id'] # contains coresponding labels

#divide data into training and test sets 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20)  

In [14]:
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.fit_transform(y_test)

In [8]:
# Create the XGBoost classifier
xgb_classifier = xgb.XGBClassifier(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=0)  # Adjust hyperparameters as needed

# Train the classifier on the training data
xgb_classifier.fit(X_train, y_train_encoded)

In [10]:
# 6. Making Predictions
X_test
y_pred = xgb_classifier.predict(X_test)


In [15]:
# 7. Evaluating the Algorithm
#Confusion matrix, precision, recall, and F1 measures are the most commonly used metrics for classification tasks.
print("\nConfusion Matrix:")
print(confusion_matrix(y_test_encoded,y_pred))
print("\nClassification Report:")
print(classification_report(y_test_encoded,y_pred)) 


Confusion Matrix:
[[52  0  0  0  0  0  0  0]
 [ 0 44  0  0  1  0  1  0]
 [ 0  0 47  0  0  0  0  0]
 [ 0  0  0 43  0  0  0  3]
 [ 0  1  1  0 32  0  3  0]
 [ 0  0  2  0  0 52  1  0]
 [ 0  6  0  0  2  1 52  0]
 [ 0  0  0  6  0  0  0 22]]

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        52
           1       0.86      0.96      0.91        46
           2       0.94      1.00      0.97        47
           3       0.88      0.93      0.91        46
           4       0.91      0.86      0.89        37
           5       0.98      0.95      0.96        55
           6       0.91      0.85      0.88        61
           7       0.88      0.79      0.83        28

    accuracy                           0.92       372
   macro avg       0.92      0.92      0.92       372
weighted avg       0.93      0.92      0.92       372



In [16]:
accuracy = accuracy_score(y_test_encoded,y_pred)
print("Accuracy: {:.2f}%".format(accuracy * 100))

Accuracy: 92.47%


In [18]:
# 8. Predict the locations of Unknown Data

# Read the unknown data from "unknown_data.csv". 
# Please make sure to have that file within the local directory or add the correct path to the file
unknowndata= pd.read_csv("wifi_data_test1.csv")
unknowndata = unknowndata.drop(columns=['id'])
unknown_pred = xgb_classifier.predict(unknowndata)
print("\nUnknown Data Labels")
print(unknown_pred)


Unknown Data Labels
[6 7 1 2 5 0 3 4 4 7 1 5 2 0 4 6 3 5 1 0 7 2 4 6 3 5 1 0 4 2 7 6 2 1 3 0 4
 2 1 3 5 0 1 4 3 6 2 5 3 0 1 4 6 3 2 5 0 1 4 1 2 2 3 5 0 1 4 6 5 2 0 3 1 4
 6 5 0 2 4 1 6 3 5 0 2 4 1 6 3 0 5 4 2 1 5 5 0 3 4 2 1 1 5 0 4 1 1 3 2 5 0
 4 1 1 2 3 5 0 6 1 2 5 4 3 0 4 6 1 2 5 0 4 3 6 6 5 2 0 4 6 1 3 5 0 2 6 4 5
 5 3 0 4 2 6 6 2 0 3 6 6 2 5 1 0 4 6 2 3 1 5 0 4 6 2 5 1 3 0 4 6 2 5 1 3 0
 4 6 5 2 1 0 3 6 4 5 1 2 0 4 6 3 5 1 2 0 4 5 3 1 2 0 4 5 1 4 0 2 3 5 6 0 2
 3 5 4 1 6 4 0 2 3 5 1 6 4 0 2 5 1 3 6 4 0 2 5 6 1 4 0 3 5 2 1 4 0 7 3 5 2
 1 0 4 7 2 3 2 4 1 6 6 6 0 5 7 6 4 2 1 0 5 6 6 3 7 0 1 2 3 5 4 6 0 1 2 7 5
 3 4 6 0]


In [33]:
feature_values_for_single_data_point = [-100,-71,-75,-100,-100,-100,-100,-100,-100]

In [34]:
single_data_point_encoded = label_encoder.transform([1])#single_data_point_class_label])
single_data_point_features = [feature_values_for_single_data_point]  # Replace with your actual feature values
predicted_class = xgb_classifier.predict(single_data_point_features)
print(predicted_class+1)

[5]
