In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

from sklearn.datasets import load_diabetes

### Step 1. 데이터 전처리
당뇨병을 예측하는 간단한 머신러닝 모델을 구현해보자.

In [None]:
diabetes_data = load_diabetes(as_frame=True)
df = diabetes_data.frame 

나이, 성별, BMI, 혈압을 포함하는 10개의 feature로 target을 예측하는것이 목표이다. 총 442개의 샘플이 있다

In [None]:
print(df.shape)
print(df.head())

target은 1년 후 당뇨의 진행 정도를 수치로 나타낸 continuous value이다.

In [None]:
plt.hist(df['target'], bins=30)
plt.xlabel('Target (Diabetes Progression)')
plt.ylabel('Frequency')
plt.title('Distribution of Diabetes Progression (Target)')
plt.show()

Features (X)와 target (y)를 정의하자

In [None]:
X = df.drop("target", axis=1)  # All columns except 'target' as features
y = df["target"]  # The target variable (regression target)

고위험 환자군과 저위험 환자군을 나누는 간단한 binary classification model을 학습하기 위해 `y_binary`를 정의하자

In [None]:
y_binary = (y > np.median(y)).astype(int)  # Convert to 0 or 1 based on median split

데이터셋을 학습데이터와 평가데이터로 나눈다

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y_binary, test_size=0.2, random_state=42)

데이터 표준화를 한다.

In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

### Step 2. 학습

Logistic Regression과 SVM 두개 모델을 학습해본다.

In [None]:
logreg_model = LogisticRegression(random_state=42)
logreg_model.fit(X_train_scaled, y_train)

svm_model = SVC(random_state=42)
svm_model.fit(X_train_scaled, y_train)

### Step 3. 예측 및 평가

In [None]:
y_pred_logreg = logreg_model.predict(X_test_scaled)
y_pred_svm = svm_model.predict(X_test_scaled)


In [None]:
# Logistic Regression evaluation
accuracy_logreg = accuracy_score(y_test, y_pred_logreg)
print("Logistic Regression")
print(f"Accuracy: {accuracy_logreg:.2f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred_logreg))

In [None]:
# SVM evaluation
accuracy_svm = accuracy_score(y_test, y_pred_svm)
print("\nSupport Vector Machine")
print(f"Accuracy: {accuracy_svm:.2f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred_svm))