In [None]:
# Iris data 불러오기
import pandas as pd

iris = pd.read_csv('https://raw.githubusercontent.com/KGW852/class/main/data/iris_data.csv')
iris.head()

In [None]:
# 클래스열에 무엇이 있는지 확인해보기
print(pd.unique(iris['class']))

In [None]:
# 인풋과 아웃풋 데이터 만들기
iris_input = iris[['sepal_length', 'sepal_width', 'petal_length', 'petal_width']].to_numpy()
iris_target = iris['class'].to_numpy()

print(iris_input[:5])

In [None]:
# 훈련세트와 테스트세트로 분리하기
from sklearn.model_selection import train_test_split

train_input, test_input, train_target, test_target = train_test_split(iris_input, iris_target, random_state=42)

print(train_input.shape, test_input.shape)

In [None]:
# 데이터 정규화
from sklearn.preprocessing import StandardScaler

ss = StandardScaler()
ss.fit(train_input)
train_scaled = ss.transform(train_input)
test_scaled = ss.transform(test_input)

In [None]:
# 활성함수 : 시그모이드 함수(Sigmoid function)
import numpy as np
import matplotlib.pyplot as plt

z = np.arange(-5, 5, 0.1)
phi = 1 / (1 + np.exp(-z))

plt.plot(z, phi)
plt.title('Sigmoid function')
plt.xlabel('z')
plt.ylabel('phi')
plt.grid()
plt.show()

In [None]:
# 1. 이진분류

# boolean indexing
# 비트연산자(|)를 이용한 원하는행(가로)만 골라내기
setosa_versi_indexes = (train_target == 'Iris-setosa') | (train_target == 'Iris-versicolor')

train_setosa_versi = train_scaled[setosa_versi_indexes]
target_setosa_versi = train_target[setosa_versi_indexes]

print(train_setosa_versi.shape, target_setosa_versi.shape)

In [None]:
# 로지스틱 회귀
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression()

In [None]:
# 학습
lr.fit(train_setosa_versi, target_setosa_versi)

In [None]:
# 처음 5개 샘플 예측확인
print(lr.predict(train_setosa_versi[:5]))

In [None]:
# 5개 샘플에 대한 예측확률 출력
print(lr.predict_proba(train_setosa_versi[:5]))

In [None]:
# 클래스열 순서 확인
print(lr.classes_)

In [None]:
# 학습한 계수 확인
print(lr.coef_, lr.intercept_)

In [None]:
# 회귀방정식 z 값 확인
decisions = lr.decision_function(train_setosa_versi[:5])

print(decisions)

In [None]:
# z값을 시그모이드함수에 넣어 phi값 구헤보기
from scipy.special import expit

print(expit(decisions))

In [None]:
# 다중 분류

# 'setosa', 'versicolor', 'virginica' 3종의 클래스 분류 모델 만들기
# 하이퍼파라미터(L2규제 : C)

mlr = LogisticRegression(C=10, max_iter=1000)

In [None]:
# 모델 훈련
mlr.fit(train_scaled, train_target)

In [None]:
# 분류의 정확도 출력
print(mlr.score(train_scaled, train_target))
print(mlr.score(test_scaled, test_target))

In [None]:
# 처음 5개 샘플에 대한 예측확률 출력
print(mlr.predict(test_scaled[:5]))

In [None]:
# 5개 샘플에 대한 예측확률 출력
proba = mlr.predict_proba(test_scaled[:5])
print(np.round(proba, decimals=3))

In [None]:
# 클래스열 순서 확인
print(mlr.classes_)

In [None]:
# 학습한 계수 확인
print(mlr.coef_.shape, mlr.intercept_.shape)

In [None]:
# 회귀방정식 z 값 확인
decisions = mlr.decision_function(test_scaled[:5])

print(np.round(decisions, decimals=2))

In [None]:
# z값을 소프트맥스함수에 넣어 e_sum값 구하기
from scipy.special import softmax

proba = softmax(decisions, axis=1)

print(np.round(proba, decimals=3))

In [None]:
# 결정경계 시각화
setosa_petal_length = iris.loc[iris['class'] == 'Iris-setosa', 'petal_length'].to_numpy()
setosa_petal_width = iris.loc[iris['class'] == 'Iris-setosa', 'petal_width'].to_numpy()
versicolor_petal_length = iris.loc[iris['class'] == 'Iris-versicolor', 'petal_length'].to_numpy()
versicolor_petal_width = iris.loc[iris['class'] == 'Iris-versicolor', 'petal_width'].to_numpy()
virginica_petal_length = iris.loc[iris['class'] == 'Iris-virginica', 'petal_length'].to_numpy()
virginica_petal_width = iris.loc[iris['class'] == 'Iris-virginica', 'petal_width'].to_numpy()

plt.scatter(setosa_petal_length, setosa_petal_width, marker='^', label='Iris-setosa')
plt.scatter(versicolor_petal_length, versicolor_petal_width, marker='o', label='Iris-versicolor')
plt.scatter(virginica_petal_length, virginica_petal_width, marker='*', label='Iris-virginica')

plt.xlabel('petal_length')
plt.ylabel('petal_width')
plt.title('Iris Petal Scatter Plot')
plt.legend()
plt.show()

In [None]:
# 인풋과 아웃풋 데이터 만들기
petal_input = iris[['petal_length', 'petal_width']].to_numpy()
petal_target = iris['class'].to_numpy()

# 훈련세트와 테스트세트로 분리하기
train_input, test_input, train_target, test_target = train_test_split(petal_input, petal_target, random_state=42)

print(train_input.shape, test_input.shape)

# 데이터 정규화
ss = StandardScaler()
ss.fit(train_input)
train_scaled = ss.transform(train_input)
test_scaled = ss.transform(test_input)


# 타깃 정수형 변환
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
le.fit(train_target)
train_target_encoded = le.transform(train_target)

In [None]:
# 모델 학습
lr2 = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=1000)

In [None]:
lr2.fit(train_scaled, train_target_encoded)

In [None]:
# 결정 경계 시각화
from mlxtend.plotting import plot_decision_regions

plt.figure(figsize=(8, 6))
plot_decision_regions(train_scaled, train_target_encoded, clf=lr2, legend=2)
plt.xlabel('Petal Length')
plt.ylabel('Petal Width')
plt.title('Decision Boundary')

path = r'C:\decision boundary.png'
plt.savefig(path, dpi=300)

In [None]:
from mpl_toolkits.mplot3d import Axes3D

In [None]:
x1_min, x1_max = petal_input[:, 0].min() - 1, petal_input[:, 0].max() + 1
x2_min, x2_max = petal_input[:, 1].min() - 1, petal_input[:, 1].max() + 1
xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, 0.1), np.arange(x2_min, x2_max, 0.1))

# 각 클래스에 대한 z 값 계산
Z_values = lr2.coef_ @ np.c_[xx1.ravel(), xx2.ravel()].T + lr2.intercept_[:, np.newaxis]

# 3차원 시각화
fig = plt.figure(figsize=(12, 8))
ax = fig.add_subplot(111, projection='3d')

# 각 클래스에 대한 z 값 표시
for i, class_name in enumerate(le.classes_):
    ax.plot_surface(xx1, xx2, Z_values[i].reshape(xx1.shape), alpha=0.5, label=class_name)

# 데이터 포인트 표시
for i, class_name in enumerate(le.classes_):
    idx = (test_target == class_name)
    ax.scatter(test_input[idx, 0], test_input[idx, 1], lr2.decision_function(test_scaled)[idx, i], marker='o', label=class_name)

ax.set_xlabel('Petal Length')
ax.set_ylabel('Petal Width')
ax.set_zlabel('Z Value')
ax.set_title('Decision Boundaries')
ax.legend()

path = r'C:\3D decision boundary.png'
plt.savefig(path)