In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import r2_score
from sklearn.preprocessing import MinMaxScaler, StandardScaler

data = pd.read_csv('weight-height.csv')

data['WeightKg'] = data['Weight'] * 0.453592
data['HeightCm'] = data['Height'] * 2.54
X = data[['HeightCm']]
y = data['WeightKg']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=20)

knn = KNeighborsRegressor(n_neighbors=5)
knn.fit(X_train, y_train)
y_pred_unscaled = knn.predict(X_test)
r2_unscaled = r2_score(y_test, y_pred_unscaled)
print(f"R^2 value for unscaled data: {r2_unscaled}")

scaler_normal = MinMaxScaler()
X_train_normalized = scaler_normal.fit_transform(X_train)
X_test_normalized = scaler_normal.transform(X_test)

knn.fit(X_train_normalized, y_train)
y_pred_normalized = knn.predict(X_test_normalized)
r2_normalized = r2_score(y_test, y_pred_normalized)
print(f"R^2 value for normalized data: {r2_normalized}")

scaler_standard = StandardScaler()
X_train_standardized = scaler_standard.fit_transform(X_train)
X_test_standardized = scaler_standard.transform(X_test)

knn.fit(X_train_standardized, y_train)
y_pred_standardized = knn.predict(X_test_standardized)
r2_standardized = r2_score(y_test, y_pred_standardized)
print(f"R^2 value for standardized data: {r2_standardized}")

print("\nComparison of R^2 values:")
print(f"Unscaled Data: {r2_unscaled}")
print(f"Normalized Data: {r2_normalized}")
print(f"Standardized Data: {r2_standardized}")


R^2 value for unscaled data: 0.8327437044707436
R^2 value for normalized data: 0.8327437044707436
R^2 value for standardized data: 0.8327437044707436

Comparison of R^2 values:
Unscaled Data: 0.8327437044707436
Normalized Data: 0.8327437044707436
Standardized Data: 0.8327437044707436
