# 0. Import Library

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import pandas as pd
import seaborn as sns

import scipy.stats

![](https://onlinelibrary.wiley.com/cms/asset/ea1d3bd8-afd7-4914-b645-74d424b6690d/advs3654-fig-0002-m.jpg)

# 1. Input Data

In [None]:
df = pd.read_csv('data/iris.csv')

![](https://editor.analyticsvidhya.com/uploads/51518iris%20img1.png)

# 2. Data Preprocessing

## 2.1 Exploratory Data Analysis (EDA)

In [None]:
df.head()

In [None]:
df.shape

In [None]:
df.describe()

In [None]:
df.dtypes

## 2.2 Data Cleaning

### - Missing Value

In [None]:
df.isnull().sum()

### - Inconsistent Data

### - Outliers

## 2.3 Data Transformation

In [None]:
from sklearn.preprocessing import LabelEncoder

In [None]:
def labelEncode(data, columns):
    columns_ = []
    for i in columns:
        lb = LabelEncoder()
        lb_ = lb.fit_transform(data[i].astype(str))
        data[i+'_Encoded']=lb_
        columns_.append(i+'_Encoded')
    return columns_,lb

In [None]:
columns = ['Species']
enc_columns, lb=labelEncode(df, columns)

In [None]:
df.sample(15)

In [None]:
for i in enc_columns:
    print(i[:-8])
    print(' ',df[i[:-8]].unique())
    print(' ',df[i].unique())
    print('  -------')

## [Optional] Export เป็นไฟล์เมื่อทำ Data Cleaning เสร็จแล้ว

In [None]:
#data.to_csv('รหัส.csv',index=False)
#data.to_excel('รหัส.xlsx',index=False)

## 2.4 ตรวจสอบค่าสหสัมพันธ์ของตัวแปร x, y

In [None]:
df.corr()

In [None]:
plt.rcParams['figure.figsize'] = 10,7 
sns.heatmap(df.corr(), annot=True);

## 2.5 การกำหนด Feature / Target

In [None]:
DataMatrix = df.values
DataMatrix.shape

In [None]:
feature_cols = ['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']
X = df[feature_cols].values
Y = df.Species_Encoded.values

## 2.6 Data Preparation (แบ่งข้อมูลสำหรับ Training / Testing)

In [None]:
from sklearn.model_selection import train_test_split
X_Train, X_Test, Y_Train, Y_Test = train_test_split(X, Y, test_size=0.3, random_state=0)

# 3. Modelling: Support Vector Machines

In [None]:
from sklearn.svm import SVC

## 3.1 Training Process

In [None]:
model_svm = SVC(kernel='linear')
model_svm.fit(X_Train, Y_Train)

In [None]:
model_svm.score(X_Train, Y_Train)

## 3.2 Testing Process

In [None]:
y_predict_svm = model_svm.predict(X_Test)

# 4. Model Evaluation

## 4.1 Split Test

In [None]:
from sklearn import metrics

In [None]:
cnf_matrix = metrics.confusion_matrix(Y_Test, y_predict_svm)
cnf_matrix

In [None]:
plt.rcParams['figure.figsize'] = 10,7 
cm_display = metrics.ConfusionMatrixDisplay(confusion_matrix = cnf_matrix, display_labels = [0, 1, 2])
cm_display.plot()
plt.show()

In [None]:
from sklearn.metrics import classification_report
target_names = ['Iris-setosa', 'Iris-versicolor', 'Iris-virginica']
print(classification_report(Y_Test, y_predict_svm, target_names=target_names))

## 4.2 Cross Validation Test

In [None]:
from sklearn.model_selection import cross_val_score

In [None]:
cvs_svm = cross_val_score(model_svm, X, Y, cv=5)

In [None]:
print('Cross Validation Score {}'.format(cvs_svm))

In [None]:
print('SVM Mean: {:4f}'.format(cvs_svm.mean()))

# [Optional] Save Machine Learning Models

In [None]:
#import pickle
#filename = 'pickle_รหัสนศ.sav'
#pickle.dump(model, open(filename, 'wb'))