In [16]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

file_url = 'https://github.com/MyungKyuYi/AI-class/blob/main/combined_dataset-1.xlsx?raw=true'
combined_df = pd.read_excel(file_url)

print(combined_df.columns)

def prepare_data(df, target_column):
    X = df.drop(columns=[target_column])
    y = df[target_column]
    X = X.select_dtypes(include=[float, int])  # 수치형 데이터만 선택
    return train_test_split(X, y, test_size=0.2, random_state=42)

def standardize_data(X_train, X_test):
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    return X_train_scaled, X_test_scaled

def apply_knn(X_train, y_train, X_test, n_neighbors=5):
    knn = KNeighborsClassifier(n_neighbors=n_neighbors)
    knn.fit(X_train, y_train)
    return knn.predict(X_test)

def evaluate_model(y_test, y_pred):
    accuracy = accuracy_score(y_test, y_pred)
    print(f'Accuracy: {accuracy * 100:.2f}%')

target_column = 'Heart Rate(b/m)'
X_train, X_test, y_train, y_test = prepare_data(combined_df, target_column)
X_train_scaled, X_test_scaled = standardize_data(X_train, X_test)
y_pred = apply_knn(X_train_scaled, y_train, X_test_scaled)
evaluate_model(y_test, y_pred)

Index(['Num.', 'subject_ID', 'Sex(M/F)', 'Age(year)', 'Height(cm)',
       'Weight(kg)', 'Systolic Blood Pressure(mmHg)',
       'Diastolic Blood Pressure(mmHg)', 'Heart Rate(b/m)', 'BMI(kg/m^2)',
       ...
       '2091', '2092', '2093', '2094', '2095', '2096', '2097', '2098', '2099',
       '2100'],
      dtype='object', length=2114)
Accuracy: 3.03%
