In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer,TfidfVectorizer, HashingVectorizer
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score

## Import data

In [8]:
df = pd.read_excel('data/wifi.xlsx', sheet_name='Main data')
df = df[['ssid', 'mackonversi', 'rssi', 'lokasi']]
df

Unnamed: 0,ssid,mackonversi,rssi,lokasi
0,KRAI,273146522812986,-86,Perpustakaan D4
1,eepisMobile,264428361941459,-85,Perpustakaan D4
2,eepiswlan,211266695032288,-88,Perpustakaan D4
3,eepisGuest,264428361175954,-62,Perpustakaan D4
4,eepisGuest,211266695032290,-87,Perpustakaan D4
...,...,...,...,...
2239,eepisGuest,220077978544770,-82,Perpustakaan Pasca
2240,Lab Emerge _ WSC,158746488395693,-80,Perpustakaan Pasca
2241,senatGuest,113506318785,-76,Perpustakaan Pasca
2242,ZTE_2.4G_YYsFKz,57330268306608,-87,Perpustakaan Pasca


## Preprocessing

In [9]:
# Misal df adalah dataframe kamu
le = LabelEncoder()
df['mackonversi_encoded'] = le.fit_transform(df['mackonversi'])
df['ssid_encoded'] = le.fit_transform(df['ssid'])
df['lokasi_encoded'] = le.fit_transform(df['lokasi'])

In [None]:
df

In [20]:
df.head(5)

Unnamed: 0,ssid,mackonversi,rssi,lokasi,mackonversi_encoded,ssid_encoded,lokasi_encoded
0,KRAI,273146522812986,-86,Perpustakaan D4,428,65,6
1,eepisMobile,264428361941459,-85,Perpustakaan D4,413,148,6
2,eepiswlan,211266695032288,-88,Perpustakaan D4,192,149,6
3,eepisGuest,264428361175954,-62,Perpustakaan D4,403,147,6
4,eepisGuest,211266695032290,-87,Perpustakaan D4,194,147,6


In [13]:
df[df['mackonversi_encoded'] == 192]

Unnamed: 0,ssid,mackonversi,rssi,lokasi,mackonversi_encoded,ssid_encoded,lokasi_encoded
2,eepiswlan,211266695032288,-88,Perpustakaan D4,192,149,6
72,eepiswlan,211266695032288,-89,Perpustakaan D4,192,149,6
242,eepiswlan,211266695032288,-85,Perpustakaan D4,192,149,6


## Visualization

## Spliting Data

In [38]:
X=df[['mackonversi_encoded', 'ssid_encoded', 'rssi']]
y=df.lokasi_encoded

In [51]:
x_train, x_test, y_train, y_test = train_test_split(X,y,test_size=0.3, random_state=42)

In [52]:
print('Banyak data x_train :',len(x_train))
print('Banyak data x_test  :',len(x_test))
print('Banyak data y_train :',len(y_train))
print('Banyak data y_test  :',len(y_test))

Banyak data x_train : 1570
Banyak data x_test  : 674
Banyak data y_train : 1570
Banyak data y_test  : 674


## Training Data

In [53]:
cvec=CountVectorizer()
tvec=TfidfVectorizer()
hvec=HashingVectorizer()

In [None]:
from sklearn.svm import SVC
# from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import RandomForestClassifier 
from sklearn.neighbors import KNeighborsClassifier # eclaudian distance
from sklearn.naive_bayes import GaussianNB


# clf1 = SVC(kernel="linear") 
# clf2 = MultinomialNB()
# clf3 = RandomForestClassifier()
# clf4 = KNeighborsClassifier(n_neighbors=5) 

from sklearn.pipeline import make_pipeline

model1 = make_pipeline(SVC(kernel="rbf"))
model2 = make_pipeline(GaussianNB())
model3 = make_pipeline(RandomForestClassifier())
model4 = make_pipeline(KNeighborsClassifier(n_neighbors=5))

In [55]:
model1.fit(x_train, y_train)

In [56]:
hasil1=model1.predict(x_test)

In [57]:
matrix = classification_report(y_test,hasil1)
print('Classification report : \n',matrix)

Classification report : 
               precision    recall  f1-score   support

           0       0.29      0.11      0.16        88
           1       0.14      0.35      0.20        48
           2       0.26      0.58      0.35        57
           3       0.25      0.55      0.34        64
           4       0.00      0.00      0.00        41
           5       0.21      0.53      0.31        88
           6       0.00      0.00      0.00        82
           7       0.00      0.00      0.00        58
           8       0.06      0.01      0.02        82
           9       0.00      0.00      0.00        66

    accuracy                           0.21       674
   macro avg       0.12      0.21      0.14       674
weighted avg       0.13      0.21      0.14       674



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [59]:
model3.fit(x_train, y_train)

In [60]:
hasil3=model3.predict(x_test)

In [61]:
matrix = classification_report(y_test,hasil1)
print('Classification report : \n',matrix)

Classification report : 
               precision    recall  f1-score   support

           0       0.29      0.11      0.16        88
           1       0.14      0.35      0.20        48
           2       0.26      0.58      0.35        57
           3       0.25      0.55      0.34        64
           4       0.00      0.00      0.00        41
           5       0.21      0.53      0.31        88
           6       0.00      0.00      0.00        82
           7       0.00      0.00      0.00        58
           8       0.06      0.01      0.02        82
           9       0.00      0.00      0.00        66

    accuracy                           0.21       674
   macro avg       0.12      0.21      0.14       674
weighted avg       0.13      0.21      0.14       674



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [64]:
hasilSVM = accuracy_score(hasil1, y_test)
precision1 = precision_score(y_test, hasil1, average='weighted')
recall1 = recall_score(y_test, hasil1, average='weighted')
f1_1 = f1_score(y_test, hasil1, average='weighted')

hasilRandomForest = accuracy_score(hasil3, y_test)
precision3 = precision_score(y_test, hasil3, average='weighted')
recall3 = recall_score(y_test, hasil3, average='weighted')
f1_3 = f1_score(y_test, hasil3, average='weighted')

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [65]:
model = {'Model':['Decision Tree Classifier',
                  'Multinomial Naive Bayes',
                  'Random Forest Classifier',
                 ],
         'AccuracyScore':[hasilSVM, hasilRandomForest],
         'Precision':[precision1, precision3],
         'Recall':[recall1, recall3],
         'F1-Score':[f1_1, f1_3]
         }
model_df = pd.DataFrame(model)
model_df

ValueError: All arrays must be of the same length