In [None]:
from sklearn.datasets import load_breast_cancer
import numpy as np
import pandas as pd 
import scipy as sp 
from sklearn.preprocessing import StandardScaler


from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, roc_curve, r2_score, accuracy_score 
from sklearn.metrics import recall_score, precision_score, confusion_matrix, classification_report, roc_auc_score, auc
import statsmodels.api as sm 
import seaborn as sns 
import matplotlib.pyplot as plt
import matplotlib.font_manager as fm

plt.rc('font', family = 'Malgun Gothic')

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset
from torch.utils.data import TensorDataset # 텐서데이터셋
from torch.utils.data import DataLoader

  import pandas.util.testing as tm


## 1. data load 

In [None]:
### data load 
breast_cancer = load_breast_cancer()

### 타겟값 기존에 정상 : 1, 암 : 0
### 타겟값 기존에 암 : 1, 정상 : 0 으로 변경
breast_cancer.target = np.where(breast_cancer.target == 0,1,0) ### target값이 0 이면 1로 아니면 0으로 바꾸기 np.where(condition, true input, false input)

### train test split
X_train, X_test, y_train, y_test = train_test_split(breast_cancer.data, breast_cancer.target, test_size = 0.3, random_state = 2021)

### preprocessing
normalizer = StandardScaler()
X_train = normalizer.fit_transform(X_train)
X_test = normalizer.transform(X_test) ### fitting 없이 transform

X_train = torch.FloatTensor(X_train)
X_test = torch.FloatTensor(X_test)
y_train = torch.FloatTensor(y_train).unsqueeze(-1)
y_test = torch.FloatTensor(y_test).unsqueeze(-1)

## batch - 데이터가 작으므로, 배치 없이 하는게 성능 더 좋음 
# dataset = TensorDataset(X_train, y_train)
# dataloader = DataLoader(dataset, batch_size=30, shuffle=True)

## 2. Modeling

In [None]:
class BinaryClassifier(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.linear = nn.Linear(input_dim, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        return self.sigmoid(self.linear(x))

In [None]:
input_dim = X_train.shape[1]
model = BinaryClassifier(input_dim)
optimizer = optim.SGD(model.parameters(), lr=0.01)

## 3. Training

In [None]:
epochs = 100
for one_epoch in range(1, epochs +1):

    prediction = model(X_train)
    cost = F.binary_cross_entropy(prediction, y_train)
 
    cost.backward()
    optimizer.step()
    optimizer.zero_grad()

    if one_epoch % 10 == 0:
        print(f"Epoch {one_epoch} / {epochs} : Cost {cost.item()} ")

Epoch 10 / 100 : Cost 0.5131434798240662 
Epoch 20 / 100 : Cost 0.4398878216743469 
Epoch 30 / 100 : Cost 0.3899155557155609 
Epoch 40 / 100 : Cost 0.35353419184684753 
Epoch 50 / 100 : Cost 0.3257502615451813 
Epoch 60 / 100 : Cost 0.3037448525428772 
Epoch 70 / 100 : Cost 0.2858119308948517 
Epoch 80 / 100 : Cost 0.27086058259010315 
Epoch 90 / 100 : Cost 0.2581614851951599 
Epoch 100 / 100 : Cost 0.24720872938632965 


In [None]:
## prediction -> tensor to array
with torch.no_grad():
    y_pred = model(X_test) ### sigmoid output 0과 1사이 소수점으로 나오기에 반올림 필요
    y_pred_class = y_pred.round()
    y_pred_arr = y_pred_class.numpy().flatten()
    y_test_arr = y_test.numpy().flatten()


In [None]:
def eval_binaryclass(y_test, y_pred):
    acc = accuracy_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)

    print(classification_report(y_test, y_pred))
    display(pd.DataFrame(confusion_matrix(y_test, y_pred)))

eval_binaryclass(y_test_arr, y_pred_arr)

              precision    recall  f1-score   support

         0.0       0.94      0.95      0.94       107
         1.0       0.92      0.89      0.90        64

    accuracy                           0.93       171
   macro avg       0.93      0.92      0.92       171
weighted avg       0.93      0.93      0.93       171



Unnamed: 0,0,1
0,102,5
1,7,57
