In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# 1) 데이터 로드 (헤더 사용)
df = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/breast_cancer.csv")  # Colab이면 자신의 경로로 변경

# 인덱스 열 제거
if "Unnamed: 0" in df.columns:
    df = df.drop(columns=["Unnamed: 0"])

# 2) 특징/라벨 분리
X = df.drop(columns=["label"])
y = df["label"]

# 3) 학습/검증 분리 (계층적 분할)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

# 4) 모델 구성
dt = DecisionTreeClassifier(random_state=42)
rf = RandomForestClassifier(n_estimators=200, random_state=42)
lr = LogisticRegression(max_iter=1000, solver="liblinear")  # 수렴 안정

# 5) 학습
dt.fit(X_train, y_train)
rf.fit(X_train, y_train)
lr.fit(X_train, y_train)

# 6) 평가

dt_y_pred = dt.predict(X_test)
rf_y_pred = rf.predict(X_test)
lr_y_pred = lr.predict(X_test)

dt_acc = accuracy_score(y_test, dt_y_pred )
rf_acc = accuracy_score(y_test, rf_y_pred)
lr_acc = accuracy_score(y_test, lr_y_pred)

print("=== Test Accuracy ===")
print(f"Decision Tree : {dt_acc:.4f}")
print(f"Random Forest : {rf_acc:.4f}")
print(f"Logistic Reg. : {lr_acc:.4f}")


=== Test Accuracy ===
Decision Tree : 0.9123
Random Forest : 0.9561
Logistic Reg. : 0.9561


In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# 1) 데이터 로드
df = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/breast_cancer.csv")

# 인덱스 열 제거
if "Unnamed: 0" in df.columns:
    df = df.drop(columns=["Unnamed: 0"])

# 2) 특징/라벨 분리
X = df.drop(columns=["label"])
y = df["label"]

# ✅ numpy 변환
X = X.values

# ✅ 원핫 인코딩
Y = pd.get_dummies(y).values

# 3) 학습/검증 분리
X_train, X_test, Y_train, Y_test = train_test_split(
    X, Y, test_size=0.2, stratify=y, random_state=42
)

# 스케일링
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# 4) 딥러닝 모델 구성
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(32, activation='relu'),
    Dense(Y_train.shape[1], activation='softmax')  # 클래스 개수만큼 출력
])

model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# 5) 학습
history = model.fit(
    X_train, Y_train,
    epochs=30,
    batch_size=16,
    validation_split=0.2,
    verbose=1
)

# 6) 평가
loss, acc = model.evaluate(X_test, Y_test, verbose=0)
print("\n=== Test Accuracy ===")
print(f"Deep Learning MLP : {acc:.4f}")

# 7) 분류 리포트 & 혼동 행렬
y_pred = model.predict(X_test)
y_pred_class = np.argmax(y_pred, axis=1)
y_test_class = np.argmax(Y_test, axis=1)

print("\n=== Classification Report ===")
print(classification_report(y_test_class, y_pred_class))

print("=== Confusion Matrix ===")
print(confusion_matrix(y_test_class, y_pred_class))


Epoch 1/30


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 22ms/step - accuracy: 0.8557 - loss: 0.4469 - val_accuracy: 0.9451 - val_loss: 0.1878
Epoch 2/30
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.9281 - loss: 0.1813 - val_accuracy: 0.9670 - val_loss: 0.1122
Epoch 3/30
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.9761 - loss: 0.0786 - val_accuracy: 0.9670 - val_loss: 0.0871
Epoch 4/30
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.9682 - loss: 0.0963 - val_accuracy: 0.9890 - val_loss: 0.0683
Epoch 5/30
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.9787 - loss: 0.0744 - val_accuracy: 0.9890 - val_loss: 0.0589
Epoch 6/30
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.9947 - loss: 0.0464 - val_accuracy: 0.9890 - val_loss: 0.0507
Epoch 7/30
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━