<a href="https://colab.research.google.com/github/Rathachai/CV/blob/gh-pages/code/ch07/classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Data Analytics Book : CODE

**ISBN** : 978-616-338-169-9

**Author** : Rathachai Chawuthai

**Year** : 2022

**Copyright** © by KMITL

**Link** : https://github.com/Rathachai/DA-LAB



---


## **Chapter 7 : Classification Analytics**


---

### Classification

In [1]:
import numpy as np
import pandas as pd

from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression

from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier

from sklearn.model_selection import KFold

from sklearn.metrics import accuracy_score

In [2]:
# step: 1
csv_path = "https://rathachai.github.io/DA-LAB/datasets/simple-veh-class.csv"
df = pd.read_csv(csv_path)

In [3]:
# step: 2
X = df[['weight_kg', 'height_m', 'n_wheels']]
y = df['vtype']

In [4]:
# step: 3 (n/a)

In [5]:
# step: 4
#  step: 4.1
envs={}
envs["LR"] = LogisticRegression()
envs["DT"] = DecisionTreeClassifier()
envs["NB"] = GaussianNB()
envs["KNN (k3)"] = KNeighborsClassifier(n_neighbors=3)
envs["KNN (k5)"] = KNeighborsClassifier(n_neighbors=5)
envs["ANN (h10)"] = MLPClassifier(hidden_layer_sizes=(10,), activation='logistic', max_iter=5000)
envs["ANN (h100)"] = MLPClassifier(hidden_layer_sizes=(100,), activation='logistic', max_iter=5000)

In [6]:
env_results = {} # for storing results

for env_name in envs.keys():

  acc_list = [] # for storing accuracy in each fold 

  #  step: 4.2
  kf = KFold(n_splits=4)

  for train_index, test_index in kf.split(X):

    #  step: 4.2.1
    X_train, X_test = X.loc[train_index], X.loc[test_index]
    y_train, y_test = y[train_index], y[test_index]

    #  step: 4.2.2 (n/a)

    #  step: 4.2.3
    model = envs[env_name].fit(X_train, y_train)

    #  step: 4.2.4
    y_pred = model.predict(X_test)

    #  step: 4.2.5
    acc = accuracy_score(y_test, y_pred)
    acc_list.append(acc)

  # step: 4.3
  acc_mean = np.mean(acc_list)
  env_results[env_name] = round(acc_mean,3)

In [7]:
# step: 5
print(env_results)

{'LR': 0.1, 'DT': 1.0, 'NB': 0.95, 'KNN (k3)': 0.85, 'KNN (k5)': 0.6, 'ANN (h10)': 0.05, 'ANN (h100)': 0.4}


In [8]:
# step: 6
model = envs['NB'].fit(X,y)

In [9]:
# step: 7
import pickle

In [10]:
filename = 'clf.model'
pickle.dump(model, open(filename, 'wb'))



---



https://rathachai.creatier.pro/