In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer,TfidfVectorizer, HashingVectorizer
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score

## Import data

In [2]:
df = pd.read_excel('data/wifi.xlsx', sheet_name='Main data')
df = df[['ssid', 'mackonversi', 'rssi', 'lokasi']]
df

Unnamed: 0,ssid,mackonversi,rssi,lokasi
0,KRAI,273146522812986,-86,Perpustakaan D4
1,eepisMobile,264428361941459,-85,Perpustakaan D4
2,eepiswlan,211266695032288,-88,Perpustakaan D4
3,eepisGuest,264428361175954,-62,Perpustakaan D4
4,eepisGuest,211266695032290,-87,Perpustakaan D4
...,...,...,...,...
2239,eepisGuest,220077978544770,-82,Perpustakaan Pasca
2240,Lab Emerge _ WSC,158746488395693,-80,Perpustakaan Pasca
2241,senatGuest,113506318785,-76,Perpustakaan Pasca
2242,ZTE_2.4G_YYsFKz,57330268306608,-87,Perpustakaan Pasca


## Preprocessing

In [3]:
# Misal df adalah dataframe kamu
le = LabelEncoder()
df['mackonversi_encoded'] = le.fit_transform(df['mackonversi'])
df['ssid_encoded'] = le.fit_transform(df['ssid'])
df['lokasi_encoded'] = le.fit_transform(df['lokasi'])

In [4]:
df

Unnamed: 0,ssid,mackonversi,rssi,lokasi,mackonversi_encoded,ssid_encoded,lokasi_encoded
0,KRAI,273146522812986,-86,Perpustakaan D4,428,65,6
1,eepisMobile,264428361941459,-85,Perpustakaan D4,413,148,6
2,eepiswlan,211266695032288,-88,Perpustakaan D4,192,149,6
3,eepisGuest,264428361175954,-62,Perpustakaan D4,403,147,6
4,eepisGuest,211266695032290,-87,Perpustakaan D4,194,147,6
...,...,...,...,...,...,...,...
2239,eepisGuest,220077978544770,-82,Perpustakaan Pasca,336,147,7
2240,Lab Emerge _ WSC,158746488395693,-80,Perpustakaan Pasca,146,79,7
2241,senatGuest,113506318785,-76,Perpustakaan Pasca,1,166,7
2242,ZTE_2.4G_YYsFKz,57330268306608,-87,Perpustakaan Pasca,47,134,7


In [5]:
df.head(5)

Unnamed: 0,ssid,mackonversi,rssi,lokasi,mackonversi_encoded,ssid_encoded,lokasi_encoded
0,KRAI,273146522812986,-86,Perpustakaan D4,428,65,6
1,eepisMobile,264428361941459,-85,Perpustakaan D4,413,148,6
2,eepiswlan,211266695032288,-88,Perpustakaan D4,192,149,6
3,eepisGuest,264428361175954,-62,Perpustakaan D4,403,147,6
4,eepisGuest,211266695032290,-87,Perpustakaan D4,194,147,6


In [6]:
df[df['mackonversi_encoded'] == 192]

Unnamed: 0,ssid,mackonversi,rssi,lokasi,mackonversi_encoded,ssid_encoded,lokasi_encoded
2,eepiswlan,211266695032288,-88,Perpustakaan D4,192,149,6
72,eepiswlan,211266695032288,-89,Perpustakaan D4,192,149,6
242,eepiswlan,211266695032288,-85,Perpustakaan D4,192,149,6


## Visualization

## Spliting Data

In [7]:
X=df[['mackonversi_encoded', 'ssid_encoded', 'rssi']]
y=df.lokasi_encoded

In [8]:
x_train, x_test, y_train, y_test = train_test_split(X,y,test_size=0.3, random_state=42)

In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
from sklearn.svm import SVC

# Parameter Grid untuk SVM
param_grid = {
    'C': [0.1, 1, 10, 100],
    'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
    'kernel': ['rbf'],
    'max_iter': [1000, 5000, 10000],
    'class_weight': [None, 'balanced']
}

# Inisialisasi dan Grid Search
svc = SVC()
grid_svc = GridSearchCV(svc, param_grid, refit=True, verbose=3, cv=5, return_train_score=True)
grid_svc.fit(x_train, y_train)

# Simpan semua hasil ke dalam DataFrame
results_df = pd.DataFrame(grid_svc.cv_results_)

# Simpan ke file Excel (opsional)
# lokasi_terpilih = 'lokasi_wifi'  # Ganti dengan nama lokasi sebenarnya jika diperlukan
# file_name_SVM = f"hasil_SVM_{lokasi_terpilih}.xlsx"
# results_df.to_excel(file_name_SVM, index=False)  # Uncomment jika ingin menyimpan ke Excel

# Evaluasi hasil terbaik
y_pred_svm = grid_svc.predict(x_test)
akurasi_SVM = round(accuracy_score(y_test, y_pred_svm) * 100, 2)

# Cetak hasil evaluasi
print(f"Best Parameters: {grid_svc.best_params_}")
print(f"Akurasi Model SVM: {akurasi_SVM}%")
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_svm))
print("\nClassification Report:")
print(classification_report(y_test, y_pred_svm))

Fitting 5 folds for each of 120 candidates, totalling 600 fits
[CV 1/5] END C=0.1, class_weight=None, gamma=1, kernel=rbf, max_iter=1000;, score=(train=0.167, test=0.169) total time=   0.1s
[CV 2/5] END C=0.1, class_weight=None, gamma=1, kernel=rbf, max_iter=1000;, score=(train=0.168, test=0.166) total time=   0.1s
[CV 3/5] END C=0.1, class_weight=None, gamma=1, kernel=rbf, max_iter=1000;, score=(train=0.168, test=0.166) total time=   0.1s
[CV 4/5] END C=0.1, class_weight=None, gamma=1, kernel=rbf, max_iter=1000;, score=(train=0.167, test=0.169) total time=   0.0s
[CV 5/5] END C=0.1, class_weight=None, gamma=1, kernel=rbf, max_iter=1000;, score=(train=0.167, test=0.169) total time=   0.0s
[CV 1/5] END C=0.1, class_weight=None, gamma=1, kernel=rbf, max_iter=5000;, score=(train=0.167, test=0.169) total time=   0.1s
[CV 2/5] END C=0.1, class_weight=None, gamma=1, kernel=rbf, max_iter=5000;, score=(train=0.168, test=0.166) total time=   0.0s
[CV 3/5] END C=0.1, class_weight=None, gamma=1, 



[CV 2/5] END C=100, class_weight=None, gamma=0.1, kernel=rbf, max_iter=1000;, score=(train=0.971, test=0.748) total time=   0.1s




[CV 3/5] END C=100, class_weight=None, gamma=0.1, kernel=rbf, max_iter=1000;, score=(train=0.975, test=0.774) total time=   0.1s




[CV 4/5] END C=100, class_weight=None, gamma=0.1, kernel=rbf, max_iter=1000;, score=(train=0.970, test=0.783) total time=   0.1s




[CV 5/5] END C=100, class_weight=None, gamma=0.1, kernel=rbf, max_iter=1000;, score=(train=0.975, test=0.758) total time=   0.1s
[CV 1/5] END C=100, class_weight=None, gamma=0.1, kernel=rbf, max_iter=5000;, score=(train=0.972, test=0.761) total time=   0.0s
[CV 2/5] END C=100, class_weight=None, gamma=0.1, kernel=rbf, max_iter=5000;, score=(train=0.971, test=0.748) total time=   0.0s
[CV 3/5] END C=100, class_weight=None, gamma=0.1, kernel=rbf, max_iter=5000;, score=(train=0.975, test=0.774) total time=   0.0s
[CV 4/5] END C=100, class_weight=None, gamma=0.1, kernel=rbf, max_iter=5000;, score=(train=0.970, test=0.783) total time=   0.1s
[CV 5/5] END C=100, class_weight=None, gamma=0.1, kernel=rbf, max_iter=5000;, score=(train=0.975, test=0.758) total time=   0.0s
[CV 1/5] END C=100, class_weight=None, gamma=0.1, kernel=rbf, max_iter=10000;, score=(train=0.972, test=0.761) total time=   0.0s
[CV 2/5] END C=100, class_weight=None, gamma=0.1, kernel=rbf, max_iter=10000;, score=(train=0.97



[CV 2/5] END C=100, class_weight=None, gamma=0.01, kernel=rbf, max_iter=1000;, score=(train=0.953, test=0.815) total time=   0.0s
[CV 3/5] END C=100, class_weight=None, gamma=0.01, kernel=rbf, max_iter=1000;, score=(train=0.953, test=0.809) total time=   0.0s




[CV 4/5] END C=100, class_weight=None, gamma=0.01, kernel=rbf, max_iter=1000;, score=(train=0.950, test=0.834) total time=   0.0s
[CV 5/5] END C=100, class_weight=None, gamma=0.01, kernel=rbf, max_iter=1000;, score=(train=0.956, test=0.825) total time=   0.0s
[CV 1/5] END C=100, class_weight=None, gamma=0.01, kernel=rbf, max_iter=5000;, score=(train=0.955, test=0.812) total time=   0.0s
[CV 2/5] END C=100, class_weight=None, gamma=0.01, kernel=rbf, max_iter=5000;, score=(train=0.953, test=0.815) total time=   0.0s
[CV 3/5] END C=100, class_weight=None, gamma=0.01, kernel=rbf, max_iter=5000;, score=(train=0.953, test=0.809) total time=   0.0s
[CV 4/5] END C=100, class_weight=None, gamma=0.01, kernel=rbf, max_iter=5000;, score=(train=0.950, test=0.834) total time=   0.0s
[CV 5/5] END C=100, class_weight=None, gamma=0.01, kernel=rbf, max_iter=5000;, score=(train=0.956, test=0.825) total time=   0.0s
[CV 1/5] END C=100, class_weight=None, gamma=0.01, kernel=rbf, max_iter=10000;, score=(tra



[CV 1/5] END C=100, class_weight=None, gamma=0.001, kernel=rbf, max_iter=1000;, score=(train=0.893, test=0.790) total time=   0.0s
[CV 2/5] END C=100, class_weight=None, gamma=0.001, kernel=rbf, max_iter=1000;, score=(train=0.882, test=0.764) total time=   0.0s




[CV 3/5] END C=100, class_weight=None, gamma=0.001, kernel=rbf, max_iter=1000;, score=(train=0.878, test=0.777) total time=   0.0s
[CV 4/5] END C=100, class_weight=None, gamma=0.001, kernel=rbf, max_iter=1000;, score=(train=0.877, test=0.803) total time=   0.0s




[CV 5/5] END C=100, class_weight=None, gamma=0.001, kernel=rbf, max_iter=1000;, score=(train=0.885, test=0.768) total time=   0.0s
[CV 1/5] END C=100, class_weight=None, gamma=0.001, kernel=rbf, max_iter=5000;, score=(train=0.895, test=0.793) total time=   0.0s
[CV 2/5] END C=100, class_weight=None, gamma=0.001, kernel=rbf, max_iter=5000;, score=(train=0.882, test=0.764) total time=   0.0s
[CV 3/5] END C=100, class_weight=None, gamma=0.001, kernel=rbf, max_iter=5000;, score=(train=0.877, test=0.777) total time=   0.0s




[CV 4/5] END C=100, class_weight=None, gamma=0.001, kernel=rbf, max_iter=5000;, score=(train=0.878, test=0.803) total time=   0.0s
[CV 5/5] END C=100, class_weight=None, gamma=0.001, kernel=rbf, max_iter=5000;, score=(train=0.884, test=0.768) total time=   0.0s
[CV 1/5] END C=100, class_weight=None, gamma=0.001, kernel=rbf, max_iter=10000;, score=(train=0.895, test=0.793) total time=   0.0s
[CV 2/5] END C=100, class_weight=None, gamma=0.001, kernel=rbf, max_iter=10000;, score=(train=0.882, test=0.764) total time=   0.0s
[CV 3/5] END C=100, class_weight=None, gamma=0.001, kernel=rbf, max_iter=10000;, score=(train=0.877, test=0.777) total time=   0.0s
[CV 4/5] END C=100, class_weight=None, gamma=0.001, kernel=rbf, max_iter=10000;, score=(train=0.878, test=0.803) total time=   0.0s
[CV 5/5] END C=100, class_weight=None, gamma=0.001, kernel=rbf, max_iter=10000;, score=(train=0.884, test=0.768) total time=   0.0s
[CV 1/5] END C=100, class_weight=None, gamma=0.0001, kernel=rbf, max_iter=1000



[CV 2/5] END C=100, class_weight=None, gamma=0.0001, kernel=rbf, max_iter=1000;, score=(train=0.686, test=0.621) total time=   0.0s
[CV 3/5] END C=100, class_weight=None, gamma=0.0001, kernel=rbf, max_iter=1000;, score=(train=0.679, test=0.624) total time=   0.0s




[CV 4/5] END C=100, class_weight=None, gamma=0.0001, kernel=rbf, max_iter=1000;, score=(train=0.673, test=0.643) total time=   0.0s
[CV 5/5] END C=100, class_weight=None, gamma=0.0001, kernel=rbf, max_iter=1000;, score=(train=0.692, test=0.618) total time=   0.0s




[CV 1/5] END C=100, class_weight=None, gamma=0.0001, kernel=rbf, max_iter=5000;, score=(train=0.677, test=0.576) total time=   0.0s
[CV 2/5] END C=100, class_weight=None, gamma=0.0001, kernel=rbf, max_iter=5000;, score=(train=0.686, test=0.621) total time=   0.0s
[CV 3/5] END C=100, class_weight=None, gamma=0.0001, kernel=rbf, max_iter=5000;, score=(train=0.679, test=0.621) total time=   0.0s
[CV 4/5] END C=100, class_weight=None, gamma=0.0001, kernel=rbf, max_iter=5000;, score=(train=0.674, test=0.650) total time=   0.0s




[CV 5/5] END C=100, class_weight=None, gamma=0.0001, kernel=rbf, max_iter=5000;, score=(train=0.692, test=0.618) total time=   0.0s
[CV 1/5] END C=100, class_weight=None, gamma=0.0001, kernel=rbf, max_iter=10000;, score=(train=0.677, test=0.576) total time=   0.0s
[CV 2/5] END C=100, class_weight=None, gamma=0.0001, kernel=rbf, max_iter=10000;, score=(train=0.686, test=0.621) total time=   0.0s
[CV 3/5] END C=100, class_weight=None, gamma=0.0001, kernel=rbf, max_iter=10000;, score=(train=0.679, test=0.621) total time=   0.0s
[CV 4/5] END C=100, class_weight=None, gamma=0.0001, kernel=rbf, max_iter=10000;, score=(train=0.674, test=0.650) total time=   0.0s
[CV 5/5] END C=100, class_weight=None, gamma=0.0001, kernel=rbf, max_iter=10000;, score=(train=0.692, test=0.618) total time=   0.0s
[CV 1/5] END C=100, class_weight=balanced, gamma=1, kernel=rbf, max_iter=1000;, score=(train=0.976, test=0.506) total time=   0.1s
[CV 2/5] END C=100, class_weight=balanced, gamma=1, kernel=rbf, max_iter



[CV 1/5] END C=100, class_weight=balanced, gamma=0.1, kernel=rbf, max_iter=1000;, score=(train=0.975, test=0.768) total time=   0.0s




[CV 2/5] END C=100, class_weight=balanced, gamma=0.1, kernel=rbf, max_iter=1000;, score=(train=0.971, test=0.748) total time=   0.0s




[CV 3/5] END C=100, class_weight=balanced, gamma=0.1, kernel=rbf, max_iter=1000;, score=(train=0.975, test=0.771) total time=   0.0s




[CV 4/5] END C=100, class_weight=balanced, gamma=0.1, kernel=rbf, max_iter=1000;, score=(train=0.970, test=0.783) total time=   0.0s




[CV 5/5] END C=100, class_weight=balanced, gamma=0.1, kernel=rbf, max_iter=1000;, score=(train=0.975, test=0.758) total time=   0.1s
[CV 1/5] END C=100, class_weight=balanced, gamma=0.1, kernel=rbf, max_iter=5000;, score=(train=0.975, test=0.768) total time=   0.1s
[CV 2/5] END C=100, class_weight=balanced, gamma=0.1, kernel=rbf, max_iter=5000;, score=(train=0.971, test=0.748) total time=   0.1s
[CV 3/5] END C=100, class_weight=balanced, gamma=0.1, kernel=rbf, max_iter=5000;, score=(train=0.975, test=0.771) total time=   0.1s
[CV 4/5] END C=100, class_weight=balanced, gamma=0.1, kernel=rbf, max_iter=5000;, score=(train=0.970, test=0.783) total time=   0.1s
[CV 5/5] END C=100, class_weight=balanced, gamma=0.1, kernel=rbf, max_iter=5000;, score=(train=0.975, test=0.758) total time=   0.3s
[CV 1/5] END C=100, class_weight=balanced, gamma=0.1, kernel=rbf, max_iter=10000;, score=(train=0.975, test=0.768) total time=   0.1s
[CV 2/5] END C=100, class_weight=balanced, gamma=0.1, kernel=rbf, ma



[CV 2/5] END C=100, class_weight=balanced, gamma=0.01, kernel=rbf, max_iter=1000;, score=(train=0.955, test=0.818) total time=   0.0s
[CV 3/5] END C=100, class_weight=balanced, gamma=0.01, kernel=rbf, max_iter=1000;, score=(train=0.952, test=0.812) total time=   0.0s




[CV 4/5] END C=100, class_weight=balanced, gamma=0.01, kernel=rbf, max_iter=1000;, score=(train=0.951, test=0.844) total time=   0.0s
[CV 5/5] END C=100, class_weight=balanced, gamma=0.01, kernel=rbf, max_iter=1000;, score=(train=0.953, test=0.831) total time=   0.0s




[CV 1/5] END C=100, class_weight=balanced, gamma=0.01, kernel=rbf, max_iter=5000;, score=(train=0.955, test=0.809) total time=   0.0s
[CV 2/5] END C=100, class_weight=balanced, gamma=0.01, kernel=rbf, max_iter=5000;, score=(train=0.955, test=0.818) total time=   0.0s
[CV 3/5] END C=100, class_weight=balanced, gamma=0.01, kernel=rbf, max_iter=5000;, score=(train=0.952, test=0.812) total time=   0.0s
[CV 4/5] END C=100, class_weight=balanced, gamma=0.01, kernel=rbf, max_iter=5000;, score=(train=0.950, test=0.844) total time=   0.0s
[CV 5/5] END C=100, class_weight=balanced, gamma=0.01, kernel=rbf, max_iter=5000;, score=(train=0.953, test=0.831) total time=   0.0s
[CV 1/5] END C=100, class_weight=balanced, gamma=0.01, kernel=rbf, max_iter=10000;, score=(train=0.955, test=0.809) total time=   0.0s
[CV 2/5] END C=100, class_weight=balanced, gamma=0.01, kernel=rbf, max_iter=10000;, score=(train=0.955, test=0.818) total time=   0.0s
[CV 3/5] END C=100, class_weight=balanced, gamma=0.01, kerne



[CV 1/5] END C=100, class_weight=balanced, gamma=0.001, kernel=rbf, max_iter=1000;, score=(train=0.891, test=0.777) total time=   0.0s
[CV 2/5] END C=100, class_weight=balanced, gamma=0.001, kernel=rbf, max_iter=1000;, score=(train=0.877, test=0.774) total time=   0.0s




[CV 3/5] END C=100, class_weight=balanced, gamma=0.001, kernel=rbf, max_iter=1000;, score=(train=0.881, test=0.771) total time=   0.0s
[CV 4/5] END C=100, class_weight=balanced, gamma=0.001, kernel=rbf, max_iter=1000;, score=(train=0.879, test=0.787) total time=   0.0s




[CV 5/5] END C=100, class_weight=balanced, gamma=0.001, kernel=rbf, max_iter=1000;, score=(train=0.878, test=0.777) total time=   0.0s
[CV 1/5] END C=100, class_weight=balanced, gamma=0.001, kernel=rbf, max_iter=5000;, score=(train=0.891, test=0.777) total time=   0.0s
[CV 2/5] END C=100, class_weight=balanced, gamma=0.001, kernel=rbf, max_iter=5000;, score=(train=0.877, test=0.774) total time=   0.0s
[CV 3/5] END C=100, class_weight=balanced, gamma=0.001, kernel=rbf, max_iter=5000;, score=(train=0.881, test=0.771) total time=   0.0s
[CV 4/5] END C=100, class_weight=balanced, gamma=0.001, kernel=rbf, max_iter=5000;, score=(train=0.879, test=0.787) total time=   0.0s
[CV 5/5] END C=100, class_weight=balanced, gamma=0.001, kernel=rbf, max_iter=5000;, score=(train=0.878, test=0.777) total time=   0.0s
[CV 1/5] END C=100, class_weight=balanced, gamma=0.001, kernel=rbf, max_iter=10000;, score=(train=0.891, test=0.777) total time=   0.0s
[CV 2/5] END C=100, class_weight=balanced, gamma=0.001



[CV 2/5] END C=100, class_weight=balanced, gamma=0.0001, kernel=rbf, max_iter=1000;, score=(train=0.672, test=0.643) total time=   0.0s
[CV 3/5] END C=100, class_weight=balanced, gamma=0.0001, kernel=rbf, max_iter=1000;, score=(train=0.659, test=0.611) total time=   0.0s




[CV 4/5] END C=100, class_weight=balanced, gamma=0.0001, kernel=rbf, max_iter=1000;, score=(train=0.663, test=0.621) total time=   0.0s
[CV 5/5] END C=100, class_weight=balanced, gamma=0.0001, kernel=rbf, max_iter=1000;, score=(train=0.671, test=0.583) total time=   0.0s




[CV 1/5] END C=100, class_weight=balanced, gamma=0.0001, kernel=rbf, max_iter=5000;, score=(train=0.659, test=0.586) total time=   0.0s
[CV 2/5] END C=100, class_weight=balanced, gamma=0.0001, kernel=rbf, max_iter=5000;, score=(train=0.674, test=0.643) total time=   0.0s
[CV 3/5] END C=100, class_weight=balanced, gamma=0.0001, kernel=rbf, max_iter=5000;, score=(train=0.660, test=0.611) total time=   0.0s
[CV 4/5] END C=100, class_weight=balanced, gamma=0.0001, kernel=rbf, max_iter=5000;, score=(train=0.665, test=0.618) total time=   0.0s
[CV 5/5] END C=100, class_weight=balanced, gamma=0.0001, kernel=rbf, max_iter=5000;, score=(train=0.671, test=0.583) total time=   0.0s
[CV 1/5] END C=100, class_weight=balanced, gamma=0.0001, kernel=rbf, max_iter=10000;, score=(train=0.659, test=0.586) total time=   0.0s
[CV 2/5] END C=100, class_weight=balanced, gamma=0.0001, kernel=rbf, max_iter=10000;, score=(train=0.674, test=0.643) total time=   0.0s
[CV 3/5] END C=100, class_weight=balanced, gam



In [10]:
from sklearn.naive_bayes import GaussianNB

# Parameter Grid
param_grid_gnb = {
    'var_smoothing': [1e-9, 1e-8, 1e-7, 1e-6]
}

gnb = GaussianNB()
grid_gnb = GridSearchCV(gnb, param_grid_gnb, refit=True, verbose=3, cv=5)
grid_gnb.fit(x_train, y_train)

# Evaluasi
y_pred_gnb = grid_gnb.predict(x_test)
akurasi_GNB = round(accuracy_score(y_test, y_pred_gnb) * 100, 2)

print(f"\n[GaussianNB] Best Parameters: {grid_gnb.best_params_}")
print(f"Akurasi GaussianNB: {akurasi_GNB}%")
print(confusion_matrix(y_test, y_pred_gnb))
print(classification_report(y_test, y_pred_gnb))

Fitting 5 folds for each of 4 candidates, totalling 20 fits
[CV 1/5] END ...............var_smoothing=1e-09;, score=0.172 total time=   0.0s
[CV 2/5] END ...............var_smoothing=1e-09;, score=0.236 total time=   0.0s
[CV 3/5] END ...............var_smoothing=1e-09;, score=0.245 total time=   0.0s
[CV 4/5] END ...............var_smoothing=1e-09;, score=0.242 total time=   0.0s
[CV 5/5] END ...............var_smoothing=1e-09;, score=0.248 total time=   0.0s
[CV 1/5] END ...............var_smoothing=1e-08;, score=0.172 total time=   0.0s
[CV 2/5] END ...............var_smoothing=1e-08;, score=0.236 total time=   0.0s
[CV 3/5] END ...............var_smoothing=1e-08;, score=0.245 total time=   0.0s
[CV 4/5] END ...............var_smoothing=1e-08;, score=0.242 total time=   0.0s
[CV 5/5] END ...............var_smoothing=1e-08;, score=0.248 total time=   0.0s
[CV 1/5] END ...............var_smoothing=1e-07;, score=0.172 total time=   0.0s
[CV 2/5] END ...............var_smoothing=1e-07;,

In [11]:
from sklearn.ensemble import RandomForestClassifier

# Parameter Grid
param_grid_rf = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5, 10],
    'class_weight': [None, 'balanced']
}

rf = RandomForestClassifier(random_state=42)
grid_rf = GridSearchCV(rf, param_grid_rf, refit=True, verbose=3, cv=5)
grid_rf.fit(x_train, y_train)

# Evaluasi
y_pred_rf = grid_rf.predict(x_test)
akurasi_RF = round(accuracy_score(y_test, y_pred_rf) * 100, 2)

print(f"\n[RandomForest] Best Parameters: {grid_rf.best_params_}")
print(f"Akurasi RandomForest: {akurasi_RF}%")
print(confusion_matrix(y_test, y_pred_rf))
print(classification_report(y_test, y_pred_rf))

Fitting 5 folds for each of 54 candidates, totalling 270 fits
[CV 1/5] END class_weight=None, max_depth=None, min_samples_split=2, n_estimators=50;, score=0.710 total time=   0.1s
[CV 2/5] END class_weight=None, max_depth=None, min_samples_split=2, n_estimators=50;, score=0.675 total time=   0.1s
[CV 3/5] END class_weight=None, max_depth=None, min_samples_split=2, n_estimators=50;, score=0.704 total time=   0.1s
[CV 4/5] END class_weight=None, max_depth=None, min_samples_split=2, n_estimators=50;, score=0.745 total time=   0.1s
[CV 5/5] END class_weight=None, max_depth=None, min_samples_split=2, n_estimators=50;, score=0.697 total time=   0.1s
[CV 1/5] END class_weight=None, max_depth=None, min_samples_split=2, n_estimators=100;, score=0.732 total time=   0.2s
[CV 2/5] END class_weight=None, max_depth=None, min_samples_split=2, n_estimators=100;, score=0.682 total time=   0.2s
[CV 3/5] END class_weight=None, max_depth=None, min_samples_split=2, n_estimators=100;, score=0.710 total time

In [12]:
from sklearn.neighbors import KNeighborsClassifier

# Parameter Grid
param_grid_knn = {
    'n_neighbors': [3, 5, 7, 9],
    'weights': ['uniform', 'distance'],
    'metric': ['euclidean']  # default metric
}

knn = KNeighborsClassifier()
grid_knn = GridSearchCV(knn, param_grid_knn, refit=True, verbose=3, cv=5)
grid_knn.fit(x_train, y_train)

# Evaluasi
y_pred_knn = grid_knn.predict(x_test)
akurasi_KNN = round(accuracy_score(y_test, y_pred_knn) * 100, 2)

print(f"\n[KNN] Best Parameters: {grid_knn.best_params_}")
print(f"Akurasi KNN: {akurasi_KNN}%")
print(confusion_matrix(y_test, y_pred_knn))
print(classification_report(y_test, y_pred_knn))


Fitting 5 folds for each of 8 candidates, totalling 40 fits
[CV 1/5] END metric=euclidean, n_neighbors=3, weights=uniform;, score=0.774 total time=   0.0s
[CV 2/5] END metric=euclidean, n_neighbors=3, weights=uniform;, score=0.774 total time=   0.0s
[CV 3/5] END metric=euclidean, n_neighbors=3, weights=uniform;, score=0.796 total time=   0.0s
[CV 4/5] END metric=euclidean, n_neighbors=3, weights=uniform;, score=0.803 total time=   0.0s
[CV 5/5] END metric=euclidean, n_neighbors=3, weights=uniform;, score=0.758 total time=   0.0s
[CV 1/5] END metric=euclidean, n_neighbors=3, weights=distance;, score=0.799 total time=   0.0s
[CV 2/5] END metric=euclidean, n_neighbors=3, weights=distance;, score=0.806 total time=   0.0s
[CV 3/5] END metric=euclidean, n_neighbors=3, weights=distance;, score=0.812 total time=   0.0s
[CV 4/5] END metric=euclidean, n_neighbors=3, weights=distance;, score=0.831 total time=   0.0s
[CV 5/5] END metric=euclidean, n_neighbors=3, weights=distance;, score=0.803 tota

In [None]:
print('Banyak data x_train :',len(x_train))
print('Banyak data x_test  :',len(x_test))
print('Banyak data y_train :',len(y_train))
print('Banyak data y_test  :',len(y_test))

Banyak data x_train : 1570
Banyak data x_test  : 674
Banyak data y_train : 1570
Banyak data y_test  : 674


## Training Data

In [53]:
cvec=CountVectorizer()
tvec=TfidfVectorizer()
hvec=HashingVectorizer()

In [None]:
from sklearn.svm import SVC
# from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import RandomForestClassifier 
from sklearn.neighbors import KNeighborsClassifier # eclaudian distance
from sklearn.naive_bayes import GaussianNB


# clf1 = SVC(kernel="linear") 
# clf2 = MultinomialNB()
# clf3 = RandomForestClassifier()
# clf4 = KNeighborsClassifier(n_neighbors=5) 

from sklearn.pipeline import make_pipeline

model1 = make_pipeline(SVC(kernel="rbf"))
model2 = make_pipeline(GaussianNB())
model3 = make_pipeline(RandomForestClassifier())
model4 = make_pipeline(KNeighborsClassifier(n_neighbors=5))

In [55]:
model1.fit(x_train, y_train)

In [56]:
hasil1=model1.predict(x_test)

In [57]:
matrix = classification_report(y_test,hasil1)
print('Classification report : \n',matrix)

Classification report : 
               precision    recall  f1-score   support

           0       0.29      0.11      0.16        88
           1       0.14      0.35      0.20        48
           2       0.26      0.58      0.35        57
           3       0.25      0.55      0.34        64
           4       0.00      0.00      0.00        41
           5       0.21      0.53      0.31        88
           6       0.00      0.00      0.00        82
           7       0.00      0.00      0.00        58
           8       0.06      0.01      0.02        82
           9       0.00      0.00      0.00        66

    accuracy                           0.21       674
   macro avg       0.12      0.21      0.14       674
weighted avg       0.13      0.21      0.14       674



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [59]:
model3.fit(x_train, y_train)

In [60]:
hasil3=model3.predict(x_test)

In [61]:
matrix = classification_report(y_test,hasil1)
print('Classification report : \n',matrix)

Classification report : 
               precision    recall  f1-score   support

           0       0.29      0.11      0.16        88
           1       0.14      0.35      0.20        48
           2       0.26      0.58      0.35        57
           3       0.25      0.55      0.34        64
           4       0.00      0.00      0.00        41
           5       0.21      0.53      0.31        88
           6       0.00      0.00      0.00        82
           7       0.00      0.00      0.00        58
           8       0.06      0.01      0.02        82
           9       0.00      0.00      0.00        66

    accuracy                           0.21       674
   macro avg       0.12      0.21      0.14       674
weighted avg       0.13      0.21      0.14       674



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [64]:
hasilSVM = accuracy_score(hasil1, y_test)
precision1 = precision_score(y_test, hasil1, average='weighted')
recall1 = recall_score(y_test, hasil1, average='weighted')
f1_1 = f1_score(y_test, hasil1, average='weighted')

hasilRandomForest = accuracy_score(hasil3, y_test)
precision3 = precision_score(y_test, hasil3, average='weighted')
recall3 = recall_score(y_test, hasil3, average='weighted')
f1_3 = f1_score(y_test, hasil3, average='weighted')

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [65]:
model = {'Model':['Decision Tree Classifier',
                  'Multinomial Naive Bayes',
                  'Random Forest Classifier',
                 ],
         'AccuracyScore':[hasilSVM, hasilRandomForest],
         'Precision':[precision1, precision3],
         'Recall':[recall1, recall3],
         'F1-Score':[f1_1, f1_3]
         }
model_df = pd.DataFrame(model)
model_df

ValueError: All arrays must be of the same length