In [1]:
#import libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report,confusion_matrix

In [2]:
################## KNN ###################
#import Data
data=pd.read_excel("data.xlsx")
Y=data["Type"]
X=data[["Price","Surface","Bedrooms"]]
#split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, Y,random_state=0)
#Since Knn works better on normlized data, we scale it using sklearn library
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train) 
# we must apply the scaling to the test set that we computed for the training set
X_test_scaled = scaler.transform(X_test)
#build the model ( choose the k parameters )
knn = KNeighborsClassifier(n_neighbors = 4)
#fit the data
knn.fit(X_train_scaled, y_train)
#check Accuracy on both Train and Test sets
print('Accuracy of K-NN classifier on training set: {:.2f}'
     .format(knn.score(X_train_scaled, y_train)))
print('Accuracy of K-NN classifier on test set: {:.2f}'
     .format(knn.score(X_test_scaled, y_test)))

Accuracy of K-NN classifier on training set: 0.86
Accuracy of K-NN classifier on test set: 0.80


In [3]:
################## SVM ###################
''' What is SVM ?
In machine learning, support-vector machines (SVMs, also support-vector networks) are supervised learning models with associated learning algorithms that analyze data for classification and regression analysis.
SVMs are one of the most robust prediction methods
In addition to performing linear classification, SVMs can efficiently perform a non-linear classification using what is called the kernel trick, implicitly mapping their inputs into high-dimensional feature spaces.
'''
#import Data
data=pd.read_excel("data.xlsx")
Y=data["Type"]
X=data[["Price","Surface","Bedrooms"]]
#split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, Y,random_state=0)
#Since Knn works better on normlized data, we scale it using sklearn library
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
#Build the model (C parameter is for Regularization)
SVM = SVC(C=20)
#Fit the Data
SVM.fit(X_train_scaled, y_train)
#Check accuracy on both train and test sets
print('RBF-kernel SVC training set accuracy: {:.2f}'
     .format(SVM.score(X_train_scaled, y_train)))
print('RBF-kernel SVC test set accuracy: {:.2f}'
     .format(SVM.score(X_test_scaled, y_test)))

RBF-kernel SVC training set accuracy: 0.95
RBF-kernel SVC test set accuracy: 0.83


In [4]:
################## EVALUATION ###################
#We build the confusion matrix for SVM model
svm_predicted = SVM.predict(X_test)
print(confusion_matrix(y_test, svm_predicted))
print(classification_report(y_test, svm_predicted))
#We build the confusion matrix for KNN model
KNN_predicted = knn.predict(X_test)
print(confusion_matrix(y_test, KNN_predicted))
print(classification_report(y_test, KNN_predicted))
## We can now choose our model not only based on the accuracy like we did originally but also based on other metrics that can be as important like recall or f-1 score or precision

[[ 0  0  7]
 [ 0  0 16]
 [ 0  0  7]]
              precision    recall  f1-score   support

           2       0.00      0.00      0.00         7
           3       0.00      0.00      0.00        16
           4       0.23      1.00      0.38         7

    accuracy                           0.23        30
   macro avg       0.08      0.33      0.13        30
weighted avg       0.05      0.23      0.09        30

[[ 0  0  7]
 [ 0  0 16]
 [ 0  0  7]]
              precision    recall  f1-score   support

           2       0.00      0.00      0.00         7
           3       0.00      0.00      0.00        16
           4       0.23      1.00      0.38         7

    accuracy                           0.23        30
   macro avg       0.08      0.33      0.13        30
weighted avg       0.05      0.23      0.09        30



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
