In [None]:
# Python : 3.9.18
# Numpy : 1.26.0
# Pandas : 2.1.1
# Matplotlib : 3.7.2
# Seaborn : 0.12.2
# Scikit-learn : 1.3.0
# Created: OCT. 23. 2023
# Author: D.W. SHIN
# iris 데이터 사용하여 사이킷런 정리하기

import pandas as pd
from sklearn.datasets import load_iris

In [None]:
iris = load_iris()

In [None]:
# x data
data = iris['data']
# y data
target = iris['target']
# columns
feature_names = iris['feature_names']
# make dataFrame
iris_df = pd.DataFrame(data, columns=feature_names)
# add target column
iris_df['target'] = target

In [None]:
iris_df.head()

In [None]:
iris_df.tail()

In [None]:
iris_df.info()

In [None]:
# 시각화 하기
import matplotlib.pyplot as plt
import seaborn as sns

sns.scatterplot(x='sepal width (cm)', y='sepal length (cm)', hue='target', palette='muted', data=iris_df)
plt.title('Sepal')
plt.show()

In [None]:
sns.scatterplot(x='petal width (cm)', y='petal length (cm)', hue='target', palette='muted', data=iris_df)
plt.title('Petal')
plt.show()

In [None]:
# 데이터셋 나누기
from sklearn.model_selection import train_test_split

test_size = 0.25

x_train, x_test, y_train, y_test = train_test_split(iris_df.drop('target', axis=1), iris_df['target'], stratify=iris_df['target'], test_size=test_size, random_state=0)
x_train.shape, y_train.shape

In [None]:
sns.countplot(y_train, x=y_train.index)

In [None]:
sns.countplot(y_test, x=y_test.index)

### **Logistic Regression**

In [None]:
# Logistic Regression
from sklearn.linear_model import LogisticRegression

# step 1. 모델 객체 생성
lr_model = LogisticRegression()

# step 2. 모델 학습
lr_model.fit(x_train, y_train)

# step 3. 모델 예측
lr_y_pred = lr_model.predict(x_test)

#### **Error - lr**

In [None]:
# Confusion Matrix
from sklearn.metrics import confusion_matrix

confusion_matrix(y_test, lr_y_pred)

In [None]:
# import seaborn as sns

sns.heatmap(confusion_matrix(y_test, lr_y_pred), annot=True, cmap='Reds')
plt.xlabel('predict')
plt.ylabel('actual')
plt.show()

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

print('accuracy_score : %f' % accuracy_score(y_test, lr_y_pred))
print('precision_score : %f' % precision_score(y_test, lr_y_pred, average='micro'))
print('recall_score : %f' % recall_score(y_test, lr_y_pred, average='micro'))
print('f1_score : %f' % f1_score(y_test, lr_y_pred, average='micro'))

### **SGD Classifier - Stochastic Gradient Descent**

In [None]:
# SGD Classifier
from sklearn.linear_model import SGDClassifier

# step 1. 모델 객체 생성
sgd_model = SGDClassifier(penalty='l1', random_state=0, n_jobs=-1)

# step 2. 모델 학습
sgd_model.fit(x_train, y_train)

# step 3. 모델 예측
sgd_y_pred = sgd_model.predict(x_test)

#### **Error - sgd**

In [None]:
# Confusion Matrix
# from sklearn.metrics import confusion_matrix

confusion_matrix(y_test, sgd_y_pred)

In [None]:
# import seaborn as sns

sns.heatmap(confusion_matrix(y_test, sgd_y_pred), annot=True, cmap='Reds')
plt.xlabel('predict')
plt.ylabel('actual')
plt.show()

In [None]:
# from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

print('accuracy_score : %f' % accuracy_score(y_test, sgd_y_pred))
print('precision_score : %f' % precision_score(y_test, sgd_y_pred, average='micro'))
print('recall_score : %f' % recall_score(y_test, sgd_y_pred, average='micro'))
print('f1_score : %f' % f1_score(y_test, sgd_y_pred, average='micro'))

### **K Neighbors Classifier**

In [None]:
# K Neighbors Classifier
from sklearn.neighbors import KNeighborsClassifier

# step 1. 모델 객체 생성
knc_model = KNeighborsClassifier()

# step 2. 모델 학습
knc_model.fit(x_train, y_train)

# step 3. 모델 예측
knc_y_pred = knc_model.predict(x_test)

#### **Error - knc**

In [None]:
# Confusion Matrix
# from sklearn.metrics import confusion_matrix

confusion_matrix(y_test, knc_y_pred)

In [None]:
# import seaborn as sns

sns.heatmap(confusion_matrix(y_test, knc_y_pred), annot=True, cmap='Reds')
plt.xlabel('predict')
plt.ylabel('actual')
plt.show()

In [None]:
# from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

print('accuracy_score : %f' % accuracy_score(y_test, knc_y_pred))
print('precision_score : %f' % precision_score(y_test, knc_y_pred, average='micro'))
print('recall_score : %f' % recall_score(y_test, knc_y_pred, average='micro'))
print('f1_score : %f' % f1_score(y_test, knc_y_pred, average='micro'))

### **SVC - Support Vector Classifier**

In [None]:
# K Neighbors Classifier
from sklearn.svm import SVC

# step 1. 모델 객체 생성
svc_model = SVC(random_state=0)

# step 2. 모델 학습
svc_model.fit(x_train, y_train)

# step 3. 모델 예측
svc_y_pred = svc_model.predict(x_test)

#### **Error - svc**

In [None]:
# Confusion Matrix
# from sklearn.metrics import confusion_matrix

confusion_matrix(y_test, svc_y_pred)

In [None]:
# import seaborn as sns

sns.heatmap(confusion_matrix(y_test, svc_y_pred), annot=True, cmap='Reds')
plt.xlabel('predict')
plt.ylabel('actual')
plt.show()

In [None]:
# from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

print('accuracy_score : %f' % accuracy_score(y_test, svc_y_pred))
print('precision_score : %f' % precision_score(y_test, svc_y_pred, average='micro'))
print('recall_score : %f' % recall_score(y_test, svc_y_pred, average='micro'))
print('f1_score : %f' % f1_score(y_test, svc_y_pred, average='micro'))

### **Decision Tree Classifier**

In [None]:
# Decision Tree Classifier
from sklearn.tree import DecisionTreeClassifier

# step 1. 모델 객체 생성
dtc_model = DecisionTreeClassifier(max_depth=2, random_state=0)

# step 2. 모델 학습
dtc_model.fit(x_train, y_train)

# step 3. 모델 예측
dtc_y_pred = dtc_model.predict(x_test)

#### **Error - dtc**

In [None]:
# Confusion Matrix
# from sklearn.metrics import confusion_matrix

confusion_matrix(y_test, dtc_y_pred)

In [None]:
# import seaborn as sns

sns.heatmap(confusion_matrix(y_test, dtc_y_pred), annot=True, cmap='Reds')
plt.xlabel('predict')
plt.ylabel('actual')
plt.show()

In [None]:
# from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

print('accuracy_score : %f' % accuracy_score(y_test, dtc_y_pred))
print('precision_score : %f' % precision_score(y_test, dtc_y_pred, average='micro'))
print('recall_score : %f' % recall_score(y_test, dtc_y_pred, average='micro'))
print('f1_score : %f' % f1_score(y_test, dtc_y_pred, average='micro'))