In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error

# Load California Housing Dataset
df = pd.read_csv(r"file.csv")

# Fill missing values with median
df.fillna(df.median(numeric_only=True), inplace=True)

# One-Hot Encoding for categorical feature (ocean_proximity)
df = pd.get_dummies(df, columns=["ocean_proximity"], drop_first=True)

print(df.head())

print(f" Total Samples : {len(df)}")

# Extract features & target variable
X = df.drop("median_house_value", axis=1)
y = df["median_house_value"]

# Feature Scaling (Standardization)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split dataset (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Train KNN Model using Euclidean Distance
knn = KNeighborsRegressor(n_neighbors=100, metric="euclidean")
knn.fit(X_train, y_train)

# Predictions
y_pred = knn.predict(X_test)

# Evaluate Model
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

print(" KNN Regression (Euclidean Distance) Results:")
print(f" MAE: {mae:.2f}")
print(f" RMSE: {rmse:.2f}")


   longitude  latitude  housing_median_age  total_rooms  total_bedrooms  \
0    -122.23     37.88                41.0        880.0           129.0   
1    -122.22     37.86                21.0       7099.0          1106.0   
2    -122.24     37.85                52.0       1467.0           190.0   
3    -122.25     37.85                52.0       1274.0           235.0   
4    -122.25     37.85                52.0       1627.0           280.0   

   population  households  median_income  median_house_value  \
0       322.0       126.0         8.3252            452600.0   
1      2401.0      1138.0         8.3014            358500.0   
2       496.0       177.0         7.2574            352100.0   
3       558.0       219.0         5.6431            341300.0   
4       565.0       259.0         3.8462            342200.0   

   ocean_proximity_INLAND  ocean_proximity_ISLAND  ocean_proximity_NEAR BAY  \
0                   False                   False                      True   
1     