In [45]:
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.cluster import KMeans
from sklearn.tree import DecisionTreeClassifier

from time import time
from collections import Counter

In [46]:
df = pd.read_csv('Occupancy_Estimation.csv')
df.head()

Unnamed: 0,S1_Temp,S2_Temp,S3_Temp,S4_Temp,S1_Light,S2_Light,S3_Light,S4_Light,S1_Sound,S2_Sound,S3_Sound,S4_Sound,S5_CO2,S5_CO2_Slope,S6_PIR,S7_PIR,Room_Occupancy_Count
0,24.94,24.75,24.56,25.38,121,34,53,40,0.08,0.19,0.06,0.06,390,0.769231,0,0,1
1,24.94,24.75,24.56,25.44,121,33,53,40,0.93,0.05,0.06,0.06,390,0.646154,0,0,1
2,25.0,24.75,24.5,25.44,121,34,53,40,0.43,0.11,0.08,0.06,390,0.519231,0,0,1
3,25.0,24.75,24.56,25.44,121,34,53,40,0.41,0.1,0.1,0.09,390,0.388462,0,0,1
4,25.0,24.75,24.56,25.44,121,34,54,40,0.18,0.06,0.06,0.06,390,0.253846,0,0,1


In [47]:
y = df['Room_Occupancy_Count']
x = df.drop(['Room_Occupancy_Count'], axis = 1)

from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.3)

In [48]:
def classify(model, x_train, y_train, x_test, y_test):
    train_t = time()
    model.fit(x_train, y_train)
    print(f'training time: {time() - train_t:.3f}s')
    
    test_t = time()
    prediction = model.predict(x_test)
    print(f'testing time: {time() - test_t:.3f}s')
    
    print(classification_report(y_test,prediction))

In [49]:
knn = KNeighborsClassifier(n_neighbors=6)
classify(knn, x_train, y_train, x_test, y_test)

training time: 0.004s
testing time: 0.059s
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       626
           1       1.00      0.99      0.99       151
           2       0.97      0.98      0.97       158
           3       0.97      0.97      0.97       115

    accuracy                           0.99      1050
   macro avg       0.99      0.99      0.99      1050
weighted avg       0.99      0.99      0.99      1050



In [50]:
dtc = DecisionTreeClassifier()
classify(dtc, x_train, y_train, x_test, y_test)

training time: 0.012s
testing time: 0.002s
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       626
           1       1.00      0.98      0.99       151
           2       0.96      0.97      0.97       158
           3       0.97      0.97      0.97       115

    accuracy                           0.99      1050
   macro avg       0.98      0.98      0.98      1050
weighted avg       0.99      0.99      0.99      1050



In [51]:
gnb = GaussianNB()
classify(gnb, x_train, y_train, x_test, y_test)

training time: 0.008s
testing time: 0.004s
              precision    recall  f1-score   support

           0       1.00      0.99      1.00       626
           1       1.00      0.96      0.98       151
           2       0.92      0.97      0.94       158
           3       0.96      0.99      0.97       115

    accuracy                           0.98      1050
   macro avg       0.97      0.98      0.97      1050
weighted avg       0.98      0.98      0.98      1050



In [52]:
km = KMeans(n_clusters = 4, random_state=7)
train_t = time()
km.fit(x_train)
print(f'training time: {time() - train_t:.3f}s')

test_t = time()
prediction = km.predict(x_test)
print(f'testing time: {time() - test_t:.3f}s')

cls1, cls2, cls3, cls4 = [], [], [], []

for i, x in enumerate(prediction):
    if x == 0:
        cls1.append(y_test.iloc[i])
    elif x == 1:
        cls2.append(y_test.iloc[i])
    elif x == 2:
        cls3.append(y_test.iloc[i])
    elif x == 3:
        cls4.append(y_test.iloc[i])
     
    
for i, cls in enumerate([cls1, cls2, cls3, cls4]):
    counter = Counter(list(cls))
    print(f'cluster {i}: ', counter)

training time: 0.051s
testing time: 0.003s
cluster 0:  Counter({0: 106, 2: 68, 1: 50})
cluster 1:  Counter({0: 487, 1: 100})
cluster 2:  Counter({2: 85, 3: 4, 1: 1})
cluster 3:  Counter({3: 111, 0: 33, 2: 5})
