In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score,classification_report, confusion_matrix
from sklearn.preprocessing import LabelEncoder 

%matplotlib inline

In [2]:
smarthouse = pd.read_csv("smart_houses.csv")

In [3]:
le = LabelEncoder()
smarthouse['familia'] = le.fit_transform(smarthouse['familia'].values)
#smarthouse['familia']=le.inverse_transform(smarthouse['familia'])

In [4]:
smarthouse['classe'] = le.fit_transform(smarthouse['classe'].values)

In [5]:
target = smarthouse['tipo_sh']

In [6]:
cols_to_use = ['n_moradores','idosos','def_loc','def_b_v','def_cog','def_aud','comodos',
               'r_anual','classe','A','B','C','D','E','instalacao','preco','familia','crianca']

In [7]:
X = smarthouse[cols_to_use]
y = target

In [8]:
X_treino, X_teste, y_treino, y_teste = train_test_split(X, y, test_size=0.30)

# NAIVE BAYES #

In [9]:
gnb = GaussianNB()
pred = gnb.fit(X_treino, y_treino).predict(X_teste)

print("Naive-Bayes accuracy: ", accuracy_score(y_teste, pred) )

Naive-Bayes accuracy:  0.5


In [10]:
print(confusion_matrix(y_teste, pred))
print(classification_report(y_teste, pred))

[[2 0 0 1 3 0]
 [3 6 1 1 0 0]
 [0 0 0 1 1 0]
 [0 0 5 3 1 0]
 [1 1 0 0 1 0]
 [0 0 0 0 0 7]]
              precision    recall  f1-score   support

          SH       0.33      0.33      0.33         6
        SHBV       0.86      0.55      0.67        11
        SHDA       0.00      0.00      0.00         2
        SHDC       0.50      0.33      0.40         9
        SHDL       0.17      0.33      0.22         3
         SHI       1.00      1.00      1.00         7

    accuracy                           0.50        38
   macro avg       0.48      0.42      0.44        38
weighted avg       0.62      0.50      0.54        38



In [11]:
print (pd.crosstab(y_teste, pred, rownames=['Real'], colnames=['Predito'], margins=True))

Predito  SH  SHBV  SHDA  SHDC  SHDL  SHI  All
Real                                         
SH        2     0     0     1     3    0    6
SHBV      3     6     1     1     0    0   11
SHDA      0     0     0     1     1    0    2
SHDC      0     0     5     3     1    0    9
SHDL      1     1     0     0     1    0    3
SHI       0     0     0     0     0    7    7
All       6     7     6     6     6    7   38


In [12]:
#smarthouse['familia']=le.inverse_transform(smarthouse['familia'])


In [13]:
#from yellowbrick.classifier import ClassificationReport
#visualizer = ClassificationReport(gnb, classes=target)
#visualizer.fit(X_treino, y_treino) # Fit the training data to the visualizer
#visualizer.score(X_teste, y_teste) # Evaluate the model on the test data
#g = visualizer.poof() # Draw/show/poof the data

# LINEAR SVC#

In [14]:
from sklearn.svm import LinearSVC
svc_model = LinearSVC(max_iter = 2500, random_state=0)
pred = svc_model.fit(X_treino, y_treino).predict(X_teste)
print("LinearSVC accuracy : ",accuracy_score(y_teste, pred, normalize = True))



LinearSVC accuracy :  0.5526315789473685


In [15]:
print (pd.crosstab(y_teste, pred, rownames=['Real'], colnames=['Predito'], margins=True))

Predito  SH  SHBV  SHDA  SHDC  SHDL  SHI  All
Real                                         
SH        4     0     0     0     2    0    6
SHBV      3     3     2     0     3    0   11
SHDA      0     0     1     1     0    0    2
SHDC      0     1     4     4     0    0    9
SHDL      0     1     0     0     2    0    3
SHI       0     0     0     0     0    7    7
All       7     5     7     5     7    7   38


In [16]:
print(confusion_matrix(y_teste, pred))
print(classification_report(y_teste, pred))

[[4 0 0 0 2 0]
 [3 3 2 0 3 0]
 [0 0 1 1 0 0]
 [0 1 4 4 0 0]
 [0 1 0 0 2 0]
 [0 0 0 0 0 7]]
              precision    recall  f1-score   support

          SH       0.57      0.67      0.62         6
        SHBV       0.60      0.27      0.37        11
        SHDA       0.14      0.50      0.22         2
        SHDC       0.80      0.44      0.57         9
        SHDL       0.29      0.67      0.40         3
         SHI       1.00      1.00      1.00         7

    accuracy                           0.55        38
   macro avg       0.57      0.59      0.53        38
weighted avg       0.67      0.55      0.57        38



# K- NEIGHBORS #

In [17]:
from sklearn.neighbors import KNeighborsClassifier
neigh = KNeighborsClassifier(n_neighbors=3)
neigh.fit(X_treino, y_treino)
pred = neigh.predict(X_teste)
print ("KNeighbors accuracy score : ",accuracy_score(y_teste, pred))

KNeighbors accuracy score :  0.42105263157894735


In [18]:
print (pd.crosstab(y_teste, pred, rownames=['Real'], colnames=['Predito'], margins=True))

Predito  SH  SHBV  SHDA  SHDC  SHDL  SHI  All
Real                                         
SH        1     3     0     1     1    0    6
SHBV      3     4     2     2     0    0   11
SHDA      0     0     1     1     0    0    2
SHDC      0     4     2     3     0    0    9
SHDL      0     0     0     1     2    0    3
SHI       1     0     0     0     1    5    7
All       5    11     5     8     4    5   38


In [19]:
print(confusion_matrix(y_teste, pred))
print(classification_report(y_teste, pred))

[[1 3 0 1 1 0]
 [3 4 2 2 0 0]
 [0 0 1 1 0 0]
 [0 4 2 3 0 0]
 [0 0 0 1 2 0]
 [1 0 0 0 1 5]]
              precision    recall  f1-score   support

          SH       0.20      0.17      0.18         6
        SHBV       0.36      0.36      0.36        11
        SHDA       0.20      0.50      0.29         2
        SHDC       0.38      0.33      0.35         9
        SHDL       0.50      0.67      0.57         3
         SHI       1.00      0.71      0.83         7

    accuracy                           0.42        38
   macro avg       0.44      0.46      0.43        38
weighted avg       0.46      0.42      0.43        38

