In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# 1. Load the dataset
data = pd.read_csv('C:/dataset/diabetes.csv')

# Features and target
X = data.drop(columns=['Outcome'])
y = data['Outcome']

# 2. Normalize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 3. Apply PCA (reduce to n components, e.g., n=3)
n_components = 3  # 자유롭게 설정 가능
pca = PCA(n_components=n_components)
X_pca = pca.fit_transform(X_scaled)

# 4. Split the reduced data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_pca, y, test_size=0.2, random_state=42, stratify=y)

# 5. Train a simple Logistic Regression model on reduced data
model = LogisticRegression(random_state=42)
model.fit(X_train, y_train)

# 6. Make predictions and evaluate
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f"PCA Components: {n_components}")
print(f"Test Accuracy: {accuracy:.4f}")
