In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import LabelEncoder

In [2]:
#: Import dataset
data = pd.read_csv('weight-height.csv')  # Replace with your actual dataset

# Encode 'Gender' column (categorical to numerical)
label_encoder = LabelEncoder()
data['Gender'] = label_encoder.fit_transform(data['Gender'])  # Male: 1, Female: 0 (example)

# Separate X (Gender, Height) and Y (y=Weight)
X = data[['Gender', 'Height']]  # Now 'Gender' is numeric
y = data['Weight']

# Split the data into train (70%) and test (30%)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
# Separate X (Gender, Height) and Y (y=Weight)
X = data[['Gender', 'Height']]  # Assuming Gender is encoded as 0/1 or similar
y = data['Weight']

# One-Hot Encoding 'Gender' column
data = pd.get_dummies(data, columns=['Gender'], drop_first=True)

# This will create a new column 'Gender_Male' where 1 means Male and 0 means Female

# Split the data into train (70%) and test (30%)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [3]:
# Apply Linear Regression
linear_reg = LinearRegression()
linear_reg.fit(X_train, y_train)

# Evaluate Linear Regression Model
y_train_pred_lr = linear_reg.predict(X_train)
y_test_pred_lr = linear_reg.predict(X_test)

train_r2_lr = r2_score(y_train, y_train_pred_lr)
test_r2_lr = r2_score(y_test, y_test_pred_lr)
mse_test_lr = mean_squared_error(y_test, y_test_pred_lr)

In [4]:
print("Linear Regression - Training R2:", train_r2_lr)
print("Linear Regression - Testing R2:", test_r2_lr)
print("Linear Regression - Test MSE:", mse_test_lr)

Linear Regression - Training R2: 0.8973793060969246
Linear Regression - Testing R2: 0.905911242442266
Linear Regression - Test MSE: 96.83734437830608


In [5]:
# Apply KNN Regressor
knn_reg = KNeighborsRegressor(n_neighbors=5)
knn_reg.fit(X_train, y_train)

# Evaluate KNN Regressor Model
y_train_pred_knn = knn_reg.predict(X_train)
y_test_pred_knn = knn_reg.predict(X_test)

train_r2_knn = r2_score(y_train, y_train_pred_knn)
test_r2_knn = r2_score(y_test, y_test_pred_knn)
mse_test_knn = mean_squared_error(y_test, y_test_pred_knn)

In [6]:
print("KNN Regression - Training R2:", train_r2_knn)
print("KNN Regression - Testing R2:", test_r2_knn)
print("KNN Regression - Test MSE:", mse_test_knn)

KNN Regression - Training R2: 0.9172142723737918
KNN Regression - Testing R2: 0.8821086423439015
KNN Regression - Test MSE: 121.33528273624482


In [7]:
# Compare Linear Regression and KNN
print("Comparison:\n")
print(f"Linear Regression R2 (Test): {test_r2_lr}, KNN Regression R2 (Test): {test_r2_knn}")
print(f"Linear Regression MSE (Test): {mse_test_lr}, KNN Regression MSE (Test): {mse_test_knn}")

Comparison:

Linear Regression R2 (Test): 0.905911242442266, KNN Regression R2 (Test): 0.8821086423439015
Linear Regression MSE (Test): 96.83734437830608, KNN Regression MSE (Test): 121.33528273624482
