In [1]:
# Import necessary libraries
import numpy as np
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler  # Import StandardScaler for feature scaling


In [2]:
# Load the California Housing dataset
housing = fetch_california_housing()
X = housing.data  # Features (independent variables)
y = housing.target  # Target variable (housing prices)



In [3]:
# Scale the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


In [4]:
# Create and train the linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred_test = model.predict(X_test)


In [5]:
# Evaluate the model on the test set
mse_test = mean_squared_error(y_test, y_pred_test)
r2_test = r2_score(y_test, y_pred_test)

print(f"Test Set Performance:")
print(f"Mean Squared Error: {mse_test:.2f}")
print(f"R-squared: {r2_test:.2f}")


Test Set Performance:
Mean Squared Error: 0.56
R-squared: 0.58


In [6]:
# Create new input data for prediction
# This is a single data point with 8 features
# Using more realistic feature values from the dataset
X_new = np.array([[4.5215, 41.0, 6.98412, 1.02381, 322.0, 2.55556, 37.63, -122.0]])

# Scale the new input data
X_new_scaled = scaler.transform(X_new)

# Make predictions on the new data
y_pred_new = model.predict(X_new_scaled)

# Print the predicted housing price for the new data
print(f"\nPredicted housing price for new data: ${y_pred_new[0]:.2f}")




Predicted housing price for new data: $2.45
