## 1.先import csv模組，然後打開查看iris資料集

In [1]:
import csv

In [2]:
with open('iris/iris.csv', newline='') as csvfile:

  rows = csv.reader(csvfile)

  for row in rows:
    print(row)

['Id', 'SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm', 'Species']
['1', '5.1', '3.5', '1.4', '0.2', 'Iris-setosa']
['2', '4.9', '3.0', '1.4', '0.2', 'Iris-setosa']
['3', '4.7', '3.2', '1.3', '0.2', 'Iris-setosa']
['4', '4.6', '3.1', '1.5', '0.2', 'Iris-setosa']
['5', '5.0', '3.6', '1.4', '0.2', 'Iris-setosa']
['6', '5.4', '3.9', '1.7', '0.4', 'Iris-setosa']
['7', '4.6', '3.4', '1.4', '0.3', 'Iris-setosa']
['8', '5.0', '3.4', '1.5', '0.2', 'Iris-setosa']
['9', '4.4', '2.9', '1.4', '0.2', 'Iris-setosa']
['10', '4.9', '3.1', '1.5', '0.1', 'Iris-setosa']
['11', '5.4', '3.7', '1.5', '0.2', 'Iris-setosa']
['12', '4.8', '3.4', '1.6', '0.2', 'Iris-setosa']
['13', '4.8', '3.0', '1.4', '0.1', 'Iris-setosa']
['14', '4.3', '3.0', '1.1', '0.1', 'Iris-setosa']
['15', '5.8', '4.0', '1.2', '0.2', 'Iris-setosa']
['16', '5.7', '4.4', '1.5', '0.4', 'Iris-setosa']
['17', '5.4', '3.9', '1.3', '0.4', 'Iris-setosa']
['18', '5.1', '3.5', '1.4', '0.3', 'Iris-setosa']
['19', '5.7', '3.8', '1.7

## 2.介紹：
### iris資料集裡面塞的是總共150筆的3種鳶尾花，每筆資料記載有以下資訊：
#### {SepalLength(花萼長度)、SepalWidth(花萼寬度)、PetalLength(花瓣長度)、PetalWidth(花瓣寬度)、Sepcies(鳶尾花品種)}

##  3.資料預處理
### 運用pandas處理csv檔案

In [3]:
import pandas as pd
import numpy as np
from io import StringIO

In [4]:
iris = pd.read_csv('iris/iris.csv')

In [5]:
iris

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa
...,...,...,...,...,...,...
145,146,6.7,3.0,5.2,2.3,Iris-virginica
146,147,6.3,2.5,5.0,1.9,Iris-virginica
147,148,6.5,3.0,5.2,2.0,Iris-virginica
148,149,6.2,3.4,5.4,2.3,Iris-virginica


### 用X來儲存鳶尾花的長度、寬度等資料，當作Feature特徵值。
### 用Y來儲存鳶尾花的品種名稱，當作Label資料類別。

In [6]:
X = iris.drop(columns=['Id','Species'])
Y = iris.drop(columns=['Id','SepalLengthCm','SepalWidthCm','PetalLengthCm','PetalWidthCm'])

In [7]:
Y

Unnamed: 0,Species
0,Iris-setosa
1,Iris-setosa
2,Iris-setosa
3,Iris-setosa
4,Iris-setosa
...,...
145,Iris-virginica
146,Iris-virginica
147,Iris-virginica
148,Iris-virginica


### 將X與Y做訓練與測試資料的切割。

In [8]:
from sklearn.model_selection import train_test_split

X_train,X_test,Y_train,Y_test = train_test_split(X, Y, test_size=0.3)

In [9]:
Y_train

Unnamed: 0,Species
88,Iris-versicolor
82,Iris-versicolor
47,Iris-setosa
137,Iris-virginica
51,Iris-versicolor
...,...
13,Iris-setosa
60,Iris-versicolor
80,Iris-versicolor
77,Iris-versicolor


### 將Y的訓練與測試資料做OneHotEncoding。
#### 這裡運用pandas模組的get_dummies方法。
#### get_dummies()與OneHotEncoder()差別在於，get_dummies可以直接轉換字串為0,1；
#### 而OneHotEncoder()轉換字串需要先進行Label Encoding。

#### 資料來源：https://reurl.cc/E7jNa1  -->使用Pandas進行One hot encoding章節

In [10]:
from keras.utils import np_utils
from sklearn.preprocessing import OneHotEncoder

Y_train_OneHot = pd.get_dummies(Y_train)
Y_test_OneHot = pd.get_dummies(Y_test)

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [11]:
pd.DataFrame(Y_test_OneHot)

Unnamed: 0,Species_Iris-setosa,Species_Iris-versicolor,Species_Iris-virginica
71,0,1,0
108,0,0,1
61,0,1,0
120,0,0,1
121,0,0,1
83,0,1,0
7,1,0,0
134,0,0,1
109,0,0,1
116,0,0,1


### X_train、X_test標準化

In [12]:
X_train_normalize = X_train / 255
X_test_normalize = X_test / 255
len(X_test_normalize)

45