In [3]:
import numpy as np
import pandas as pd
import time
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error,mean_squared_error, r2_score

data = pd.read_csv('../input/california-housing-prices/housing.csv')
X = data[['longitude','latitude','housing_median_age','total_rooms','total_bedrooms','population','households','median_income']]
y = data.median_house_value
X = X.fillna(X.mean())
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

start_time = time.time()
model = LinearRegression()
model.fit(X_train_scaled, y_train)
training_time = time.time() - start_time
predictions = model.predict(X_test_scaled)

mae = mean_absolute_error(y_test, predictions)
mse = mean_squared_error(y_test, predictions)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, predictions)
print(f"Training time: {training_time} seconds")
print(f"MSE: {mse}")
print(f"RMSE: {rmse}")
print(f"R²: {r2}")


Training time: 0.05205416679382324 seconds
MSE: 4966798265.8442955
RMSE: 70475.51536416243
R²: 0.6226006867385949
