In [1]:
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np

In [2]:
# 1. 데이터 준비
iris = load_iris()
iris_df = pd.DataFrame(data=np.c_[iris['data'], iris['target']], columns=iris['feature_names'] + ['species'])

# Versicolor (species=1), Virginica (species=2) 데이터만 필터링
iris_df = iris_df[iris_df['species'].isin([1, 2])]
iris_df = iris_df.reset_index(drop=True)  # 인덱스 초기화

# 컬럼 이름 간단히 변경
iris_df.columns = ['sl', 'sw', 'pl', 'pw', 'species']

# Y값 조정: Versicolor=0, Virginica=1
iris_df['species'] = iris_df['species'] - 1

# X와 Y 데이터 준비
X = iris_df[['sl', 'sw', 'pl', 'pw']]
y = iris_df['species']

In [3]:
# 2. 규제가 있는 경우 (기본: L2 Regularization)
clf_l2 = LogisticRegression()  # 기본: L2 정규화
clf_l2.fit(X, y)               # 학습
y_pred_l2 = clf_l2.predict(X)  # 예측
y_prob_l2 = clf_l2.predict_proba(X)[:, 1]  # y=1일 확률

In [4]:
clf_l2.coef_

array([[-0.39629303, -0.5122079 ,  2.93019683,  2.41380088]])

In [5]:
clf_l2.intercept_

array([-14.41402286])

In [6]:
# 결과 출력
print("=== L2 Regularization (Default) ===")
print("\nClassification Report (L2):")
print(classification_report(y, y_pred_l2))
print("\nConfusion Matrix (L2):")
print(confusion_matrix(y, y_pred_l2))

=== L2 Regularization (Default) ===

Classification Report (L2):
              precision    recall  f1-score   support

         0.0       0.98      0.94      0.96        50
         1.0       0.94      0.98      0.96        50

    accuracy                           0.96       100
   macro avg       0.96      0.96      0.96       100
weighted avg       0.96      0.96      0.96       100


Confusion Matrix (L2):
[[47  3]
 [ 1 49]]


In [7]:
# 3. 규제가 없는 경우 (penalty=None)
clf_none = LogisticRegression(penalty=None, solver='lbfgs', max_iter=200)  # 규제 없음
clf_none.fit(X, y)              # 학습
y_pred_none = clf_none.predict(X)  # 예측
y_prob_none = clf_none.predict_proba(X)[:, 1]  # y=1일 확률


In [8]:
clf_none.coef_

array([[-2.45797773, -6.6928879 ,  9.42587733, 18.35538499]])

In [9]:
clf_none.intercept_

array([-42.74748987])

In [30]:
# 결과 출력
print("\n=== No Regularization (penalty=None) ===")
print("\nClassification Report (No Regularization):")
print(classification_report(y, y_pred_none))
print("\nConfusion Matrix (No Regularization):")
print(confusion_matrix(y, y_pred_none))


=== No Regularization (penalty=None) ===

Classification Report (No Regularization):
              precision    recall  f1-score   support

         0.0       0.98      0.98      0.98        50
         1.0       0.98      0.98      0.98        50

    accuracy                           0.98       100
   macro avg       0.98      0.98      0.98       100
weighted avg       0.98      0.98      0.98       100


Confusion Matrix (No Regularization):
[[49  1]
 [ 1 49]]


In [31]:
# 4. 확률 출력 (y_prob)
print("\n=== Predicted Probabilities ===")
print("L2 Regularization (y_prob):", y_prob_l2)
print("No Regularization (y_prob):", y_prob_none)


=== Predicted Probabilities ===
L2 Regularization (y_prob): [0.15764007 0.14396064 0.31916247 0.05151543 0.2100701  0.14390249
 0.27548916 0.00406322 0.13032218 0.04519076 0.00857397 0.08617591
 0.02222525 0.23765723 0.01175076 0.08433579 0.20370442 0.02489506
 0.23302474 0.02123097 0.50464221 0.03208164 0.44706836 0.16838644
 0.06309658 0.09162166 0.24997764 0.5370907  0.18685191 0.0047961
 0.01741244 0.01027814 0.02250721 0.6527939  0.216863   0.1846251
 0.22021667 0.11325256 0.04661309 0.04673383 0.10565166 0.18096735
 0.03146482 0.0041104  0.07099812 0.04714975 0.06217979 0.06794742
 0.00188891 0.04948534 0.99340436 0.80763907 0.97321638 0.91355791
 0.97766369 0.99570418 0.41410253 0.98223703 0.95213295 0.99181653
 0.75815903 0.85606757 0.92687343 0.82129898 0.93024257 0.92360884
 0.87373265 0.99605287 0.99906393 0.58735629 0.96971513 0.75363989
 0.99616697 0.60089024 0.95309685 0.95345287 0.52611737 0.58292004
 0.95662813 0.88627728 0.97538915 0.98353204 0.96560966 0.5547432
 0.8