In [9]:
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler

# Load the California housing dataset
california_housing = fetch_california_housing()
data = pd.DataFrame(california_housing.data, columns=california_housing.feature_names)
data['target'] = california_housing.target

# Select relevant features (AveRooms, AveBedrms, AveOccup)
selected_features = ['AveRooms', 'AveBedrms', 'AveOccup']
data = data[selected_features + ['target']]

# Split the data into features (X) and target variable (y)
X = data.drop('target', axis=1)
y = data['target']

# Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Train the linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')

# Print the coefficients of the linear regression model
coefficients = pd.DataFrame({'Feature': X.columns, 'Coefficient': model.coef_})
print(coefficients)

# Predict prices for a hypothetical new house
# ...

# Predict prices for a hypothetical new house
new_house = np.array([[5, 2, 3]])  # AveRooms: 5, AveBedrms: 2, AveOccup: 3
new_house_scaled = scaler.transform(new_house)
predicted_price = model.predict(new_house_scaled) * 100000  # Scaling by $100,000
print(f'Predicted Price for the New House: ${predicted_price[0]:,.2f}')



Mean Squared Error: 1.1708074811097746
     Feature  Coefficient
0   AveRooms     0.802292
1  AveBedrms    -0.773242
2   AveOccup    -0.023256
Predicted Price for the New House: $45,712.69




# New Section