# Machine Learning Model For Liar Clasificaiton

- Our App able to flasification a illegal parking area, in this model.

## library preparation

In [90]:
import pandas as pd
import joblib as joblib
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.neighbors import KNeighborsClassifier
from sklearn.impute import SimpleImputer
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, classification_report
import matplotlib.pyplot as plt

## Data Preparation

In [91]:
BASEDATA = "Dataset/Parkir Liar(new).csv"
FILENAME = "Parkir_liar_Model.pkl"
FILENAMEGRID = "Parkir_liar_Model_TUNED.pkl"

In [92]:
# Load Dataset 
parkirDf = pd.read_csv(BASEDATA, sep =',')

In [93]:
parkirDf.head()

Unnamed: 0,Deskripsi Masalah,Status Pelaporan,Jenis Kendaraan,Waktu
0,Parkir di trotoar,Liar,Motor,8:00
1,Parkir di trotoar,Liar,Motor,9:00
2,Parkir di trotoar,Liar,Motor,10:00
3,Parkir di trotoar,Liar,Motor,11:00
4,Parkir di trotoar,Liar,Motor,12:00


In [94]:
# Pisahkan fitur dan target
X = parkirDf.drop('Status Pelaporan', axis=1)  # Fitur
y = parkirDf['Status Pelaporan']               # Target


In [95]:

# Pembagian data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [96]:
print((X_train.shape, X_test.shape), (y_train.shape, y_test.shape))

((89, 3), (23, 3)) ((89,), (23,))


In [97]:
    
# Preprocessing
categorical_cols = [ 'Deskripsi Masalah', 
                    'Jenis Kendaraan' ]  # Kolom kategori

In [98]:
# Preprocessing pipeline (OneHot untuk kategori, tetap untuk numerik)
preprocessor = ColumnTransformer(
    transformers=[
        ('Encode', OneHotEncoder(handle_unknown='ignore'), categorical_cols)
    ])

## Building Model

In [99]:
knn_pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', KNeighborsClassifier())
])

In [100]:
# Hyperparameter tuning for KNN

param_grid = {'classifier__n_neighbors': [3, 5, 7, 9, 11],
                'classifier__weights': ['uniform', 'distance'],
                'classifier__p': [1, 2]}

In [101]:
# Define Knn Model 

def gridsearchKnn(X_train, y_train,knn_pipeline,param_grid):
    grid_search_knn = GridSearchCV(knn_pipeline, param_grid, cv=5, scoring='accuracy')
    grid_search_knn.fit(X_train, y_train)
    return grid_search_knn

def knn(X_train, y_train):
    return knn_pipeline.fit(X_train, y_train)


In [102]:
def save_model(model, filename):
    # Save the trained model using joblib
    joblib.dump(model, filename)
    print(f"Model saved to {filename}")

In [103]:
def Load_Model(filename):
    # Load the trained model
    model_loaded = joblib.load(filename)
    print(f"Model loaded from {filename}")
    return model_loaded

In [104]:
# Run the pipeline (Knn Original)
Knn_model = knn(X_train, y_train)
save_model(Knn_model,FILENAME)

Model saved to Parkir_liar_Model.pkl


In [105]:
# Run the pipeline (Knn Original)
gridsearchKnn_Model = gridsearchKnn(X_train, y_train,knn_pipeline,param_grid)
save_model(gridsearchKnn_Model,FILENAMEGRID )

Traceback (most recent call last):
  File "d:\File Julian\Teknik informatika\github\Jurnal Pais\LaporParkir-Application\API\.venv\Lib\site-packages\sklearn\model_selection\_validation.py", line 971, in _score
    scores = scorer(estimator, X_test, y_test, **score_params)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\File Julian\Teknik informatika\github\Jurnal Pais\LaporParkir-Application\API\.venv\Lib\site-packages\sklearn\metrics\_scorer.py", line 279, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\File Julian\Teknik informatika\github\Jurnal Pais\LaporParkir-Application\API\.venv\Lib\site-packages\sklearn\metrics\_scorer.py", line 371, in _score
    y_pred = method_caller(
             ^^^^^^^^^^^^^^
  File "d:\File Julian\Teknik informatika\github\Jurnal Pais\LaporParkir-Application\API\.venv\Lib\site-packages\

Model saved to Parkir_liar_Model_TUNED.pkl


  1.  1.]


## Prediction

In [106]:
def Predict(self, model_loaded):
    # # Collect new data for prediction
    # Hari = input("Masukkan Hari: ")
    # Lokasi = input("Masukkan Lokasi: ")
    # Jenis_Kendaraaan = input("Masukkan Jenis Kendaraan: ")
    # Deskripsi_Masalah = input("Masukkan Deskripsi Masalah: ")
    # Jam = int(input("Masukkan Jam Pelanggaran: "))

    # Prepare new data for prediction
    New_data = pd.DataFrame({
        'Jenis Kendaraan': 'Motor',
        'Deskripsi Masalah': 'Parkir di trotoar',
    })

    # Access the pipeline from best_estimator_
    best_pipeline = model_loaded.best_estimator_

    # Ensure the new data undergoes the same preprocessing steps
    New_data_transformed = best_pipeline['preprocessor'].transform(New_data)

    # Make predictions using the loaded model
    predictions = best_pipeline['classifier'].predict(New_data_transformed)

    # Display the predictions
    New_data['Prediksi Status Pelaporan'] = predictions
    print(New_data[['Hari', 'Lokasi', 'Jenis Kendaraan', 'Deskripsi Masalah', 'Jam', 'Prediksi Status Pelaporan']])


## Visualization

In [107]:
def Confusion_matrix(self, y_test, y_pred):
    # Compute and display confusion matrix
    cm = confusion_matrix(y_test, y_pred)
    disp = ConfusionMatrixDisplay(confusion_matrix=cm)
    
    # Plot confusion matrix
    plt.figure(figsize=(8, 6))
    disp.plot(cmap=plt.cm.Blues)
    plt.title('Confusion Matrix')
    plt.show()

def Classification_report(self, y_test, y_pred):
    # Generate and print classification report
    report = classification_report(y_test, y_pred, target_names=['Tidak Pelanggaran', 'Pelanggaran'])
    print("\nClassification Report:\n", report)