In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# 데이터 로드
df = pd.read_csv("C:/dataset/diabetes.csv")

# 설명 변수(X)와 타겟 변수(y) 분리
X = df.drop(columns=['Outcome'])  # 설명 변수
y = df['Outcome']  # 타겟 변수

# 학습용 및 테스트용 데이터 분리
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 데이터 스케일링
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# 머신러닝 모델 정의
models = {
    "SVM": SVC(),
    "Logistic Regression": LogisticRegression(max_iter=300),  # max_iter 증가
    "Random Forest": RandomForestClassifier(),
    "Decision Tree": DecisionTreeClassifier(),
    "KNN": KNeighborsClassifier(n_neighbors=5)
}

# 모델별로 학습하고 성능 출력
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    print(f"\n--- {name} ---")
    print(f"Accuracy: {accuracy_score(y_test, y_pred)}")
    print(f"Confusion Matrix:\n{confusion_matrix(y_test, y_pred)}")
    print(f"Classification Report:\n{classification_report(y_test, y_pred)}")



--- SVM ---
Accuracy: 0.7337662337662337
Confusion Matrix:
[[82 17]
 [24 31]]
Classification Report:
              precision    recall  f1-score   support

           0       0.77      0.83      0.80        99
           1       0.65      0.56      0.60        55

    accuracy                           0.73       154
   macro avg       0.71      0.70      0.70       154
weighted avg       0.73      0.73      0.73       154


--- Logistic Regression ---
Accuracy: 0.7532467532467533
Confusion Matrix:
[[79 20]
 [18 37]]
Classification Report:
              precision    recall  f1-score   support

           0       0.81      0.80      0.81        99
           1       0.65      0.67      0.66        55

    accuracy                           0.75       154
   macro avg       0.73      0.74      0.73       154
weighted avg       0.76      0.75      0.75       154


--- Random Forest ---
Accuracy: 0.7597402597402597
Confusion Matrix:
[[82 17]
 [20 35]]
Classification Report:
              

In [12]:
# Part 2: 딥러닝 기반 분류 (Dense Layer만 사용)

import tensorflow as tf
from tensorflow.keras import layers, models

# 데이터 로드 및 전처리는 동일
# 설명 변수(X)와 타겟 변수(y) 분리
X = df.drop(columns=['Outcome'])  # 설명 변수
y = df['Outcome']  # 타겟 변수

# 학습용 및 테스트용 데이터 분리
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 신경망 분류 모델 정의
model = models.Sequential([
    layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    layers.Dense(32, activation='relu'),
    layers.Dense(1, activation='sigmoid')  # 분류를 위한 출력 노드
])

# 모델 컴파일
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# 모델 학습
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)

# 신경망 분류 모델 평가
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f"\n신경망 분류 모델 정확도: {test_acc}")

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 38ms/step - accuracy: 0.5744 - loss: 3.8282 - val_accuracy: 0.5772 - val_loss: 1.6438
Epoch 2/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.5931 - loss: 1.3848 - val_accuracy: 0.6911 - val_loss: 0.7436
Epoch 3/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.6122 - loss: 0.8823 - val_accuracy: 0.7317 - val_loss: 0.6450
Epoch 4/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.6225 - loss: 0.7878 - val_accuracy: 0.6667 - val_loss: 0.6112
Epoch 5/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.7019 - loss: 0.5934 - val_accuracy: 0.6748 - val_loss: 0.6436
Epoch 6/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.6623 - loss: 0.7353 - val_accuracy: 0.6911 - val_loss: 0.6337
Epoch 7/10
[1m16/16[0m [32m━━━━━━━━

In [14]:
# Part 3: Outcome 삭제 후 BMI 예측하는 머신러닝 회귀

from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Outcome 컬럼 삭제 후 BMI 예측용 데이터셋 생성
X = df.drop(columns=['Outcome', 'BMI'])  # 설명 변수
y = df['BMI']  # 타겟 변수 (예측할 값)

# 학습용 및 테스트용 데이터 분리
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 머신러닝 회귀 모델 정의
models_reg = {
    "Linear Regression": LinearRegression(),
    "Random Forest": RandomForestRegressor()
}

# 모델별로 학습하고 성능 출력
for name, model in models_reg.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    print(f"\n--- {name} ---")
    print(f"Mean Squared Error: {mse}")



--- Linear Regression ---
Mean Squared Error: 52.240582848747955

--- Random Forest ---
Mean Squared Error: 47.15652316233769


In [16]:
# Part 4: Dense Layer만 사용한 신경망 기반 회귀

# 신경망 회귀 - Dense Layer만 사용
model_reg = models.Sequential([
    layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    layers.Dense(32, activation='relu'),
    layers.Dense(1)  # 회귀를 위한 출력 노드
])

# 모델 컴파일
model_reg.compile(optimizer='adam', loss='mse')

# 모델 학습
model_reg.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)

# 신경망 회귀 모델 평가
test_loss_reg = model_reg.evaluate(X_test, y_test)
print(f"\n신경망 회귀 모델 평균 제곱 오차: {test_loss_reg}")


Epoch 1/10


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 30ms/step - loss: 2076.2861 - val_loss: 133.5628
Epoch 2/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 205.2953 - val_loss: 104.8398
Epoch 3/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 93.2115 - val_loss: 80.0145
Epoch 4/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 82.7524 - val_loss: 60.9016
Epoch 5/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 54.1562 - val_loss: 55.4918
Epoch 6/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 49.8801 - val_loss: 54.0514
Epoch 7/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 57.6399 - val_loss: 52.7333
Epoch 8/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 51.1210 - val_loss: 50.3312
Epoch 9/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━