**Activity 2- Training a Support Vector Machine (SVM) model for classifying the location based on the Received Signal Strength (RSS) from different Wi-Fi access points **

In [2]:
# 1. Importing libraries
import pandas as pd  
import numpy as np  
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split  
from sklearn.metrics import classification_report, confusion_matrix 
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score


In [3]:
# 2. Importing the dataset
#Download the dataset as a CSV file and store in the local directory. 
#To read data from CSV file, the simplest way is to use read_csv method of the pandas library. 
wifidata = pd.read_csv("wifi_data_train.csv")

In [4]:
# 3. Exploratory Data Analysis
#check the dimensions of the data and see first few records
print("Dimensions of the data:")
print(wifidata.shape)
print("\nFirst few records:")
print(wifidata.head())


Dimensions of the data:
(1860, 10)

First few records:
   UoM_Wireless1  UoM_Wireless6  UoM_Wireless11  eduroam1  eduroam6  \
0           -100            -72             -71      -100       -71   
1           -100            -54             -90      -100       -53   
2           -100            -83             -90      -100       -84   
3           -100           -100            -100      -100      -100   
4           -100            -75             -83      -100       -67   

   eduroam11  Jungle Book10  PROLINK_H5004NK_8766E11  UNIC-wifi11  id  
0        -75           -100                     -100         -100   5  
1       -100            -84                      -63         -100   1  
2        -88            -90                     -100         -100   2  
3       -100           -100                     -100         -100   3  
4       -100           -100                     -100         -100   7  


In [5]:
# 4. Data Preprocessing
# To divide the data into attributes and labels
X = wifidata.drop('id', axis=1)  #contains attributes
y = wifidata['id'] # contains coresponding labels

#divide data into training and test sets 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20)  

In [6]:
# 5. Training the Algorithm
reg = LogisticRegression().fit(X, y)
#svclassifier = SVC(kernel='linear')  
#svclassifier.fit(X_train, y_train) # train the algorithm on the training data


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [7]:
# 6. Making Predictions
X_test
y_pred = reg.predict(X_test)


In [8]:
# 7. Evaluating the Algorithm
#Confusion matrix, precision, recall, and F1 measures are the most commonly used metrics for classification tasks.
print("\nConfusion Matrix:")
print(confusion_matrix(y_test,y_pred))
print("\nClassification Report:")
print(classification_report(y_test,y_pred)) 



Confusion Matrix:
[[65  0  0  0  0  0  0  0]
 [ 0 39  0  0  1  0  1  0]
 [ 0  0 55  0  0  0  0  0]
 [ 0  0  1 27  0  1  0  5]
 [ 1  0  1  0 32  0  3  0]
 [ 0  0  2  0  0 57  0  0]
 [ 0  0  0  0  1  0 53  0]
 [ 0  0  0  1  0  2  0 24]]

Classification Report:
              precision    recall  f1-score   support

           1       0.98      1.00      0.99        65
           2       1.00      0.95      0.97        41
           3       0.93      1.00      0.96        55
           4       0.96      0.79      0.87        34
           5       0.94      0.86      0.90        37
           6       0.95      0.97      0.96        59
           7       0.93      0.98      0.95        54
           8       0.83      0.89      0.86        27

    accuracy                           0.95       372
   macro avg       0.94      0.93      0.93       372
weighted avg       0.95      0.95      0.95       372



In [11]:
accuracy = accuracy_score(y_test,y_pred)
print("Accuracy: {:.2f}%".format(accuracy * 100))

Accuracy: 94.62%


In [10]:
# 8. Predict the locations of Unknown Data

# Read the unknown data from "unknown_data.csv". 
# Please make sure to have that file within the local directory or add the correct path to the file
unknowndata= pd.read_csv("wifi_data_test1.csv")
unknowndata = unknowndata.drop(columns=['id'])
unknown_pred = reg.predict(unknowndata)
print("\nUnknown Data Labels")
print(unknown_pred)



Unknown Data Labels
[7 4 2 3 6 1 4 5 7 6 2 6 3 1 5 7 4 6 2 1 8 3 7 7 4 6 2 1 5 3 4 7 3 2 4 1 5
 3 7 8 6 1 2 5 4 7 3 6 4 1 2 5 7 4 3 6 1 2 5 6 3 3 4 6 1 2 5 2 6 3 1 4 2 5
 7 6 1 3 5 2 7 4 6 1 3 5 2 7 4 1 6 5 3 2 3 6 1 4 5 3 2 2 6 1 5 7 2 4 3 6 1
 2 7 2 3 4 6 1 7 2 3 6 5 4 1 5 7 2 3 6 1 5 4 7 2 6 3 1 5 7 2 4 6 1 3 7 5 4
 6 4 1 5 3 7 7 3 1 4 7 7 3 6 2 1 5 7 3 4 2 6 1 7 7 3 6 3 4 1 5 2 3 6 2 4 1
 5 7 6 3 2 1 8 7 5 6 2 3 1 5 7 4 6 2 3 1 5 6 4 2 3 1 5 6 2 5 1 3 4 6 7 1 3
 4 6 5 2 7 5 1 3 4 6 2 7 5 1 3 6 2 4 7 5 1 3 4 7 2 5 1 4 6 3 2 5 1 4 4 3 3
 2 1 5 4 3 4 3 5 2 7 7 7 1 6 4 7 5 3 2 1 6 7 7 4 8 1 2 3 4 6 5 7 1 2 3 8 6
 4 5 7 1]


In [14]:
feature_values_for_single_data_point = [-100,-71,-75,-100,-100,-100,-100,-100,-100]
single_data_point_features = [feature_values_for_single_data_point]  # Replace with your actual feature values
predicted_class = reg.predict(single_data_point_features)
print(predicted_class)

[5]


