In [1]:
import pandas as pd
import numpy as np
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Let download the California Housing dataset and assign it into an argument
housing = fetch_california_housing(as_frame=True)
df = housing.frame

# Display the first few rows of the dataset
print("Dataset Preview:")
print(df.head())

Dataset Preview:
   MedInc  HouseAge  AveRooms  AveBedrms  Population  AveOccup  Latitude  \
0  8.3252      41.0  6.984127   1.023810       322.0  2.555556     37.88   
1  8.3014      21.0  6.238137   0.971880      2401.0  2.109842     37.86   
2  7.2574      52.0  8.288136   1.073446       496.0  2.802260     37.85   
3  5.6431      52.0  5.817352   1.073059       558.0  2.547945     37.85   
4  3.8462      52.0  6.281853   1.081081       565.0  2.181467     37.85   

   Longitude  MedHouseVal  
0    -122.23        4.526  
1    -122.22        3.585  
2    -122.24        3.521  
3    -122.25        3.413  
4    -122.25        3.422  


In [2]:
# Select features and the target variable
# Let's choose specific features by modifying the 'features' list
features = ['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup', 'Latitude', 'Longitude']
X = df[features]          # Here we have features
y = df['MedHouseVal']     # Here we have target variable (Median House Value)

# Split the data into training and testing sets
# 80% training and 20% testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Let's perform feature scaling (standardization)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Let's initialize the regression model
model = LinearRegression()

# Train the model on the training data
model.fit(X_train_scaled, y_train)

# Make predictions on the testing data
y_pred = model.predict(X_test_scaled)

In [3]:
# Let's valuate the model's performance
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("\nModel Performance:")
print(f"Mean Squared Error: {mse:.4f}")
print(f"R² Score: {r2:.4f}")


Model Performance:
Mean Squared Error: 0.5559
R² Score: 0.5758


In [4]:
# Let's predict the price of a house for a specific scenario
# Here are the features for this specific example
house_features = {
    'MedInc': [5.0],        # Median Income
    'HouseAge': [20.0],     # House Age in years
    'AveRooms': [6.0],      # Average number of rooms
    'AveBedrms': [1.0],     # Average number of bedrooms
    'Population': [1000.0], # Population of the area
    'AveOccup': [3.0],      # Average occupancy
    'Latitude': [34.0],     # Latitude coordinate
    'Longitude': [-118.0]   # Longitude coordinate
}

# Create a DataFrame from the features, ensuring the columns are in the same order
house_df = pd.DataFrame(house_features, columns=features)

# Scale the features using the same scaler used for training
house_scaled = scaler.transform(house_df)

# Let's predict the house price using the trained model
price_prediction = model.predict(house_scaled)

# Since 'MedHouseVal' is in units of $100,000, multiply the prediction to get the actual price
estimated_price = price_prediction[0] * 100000

print("\nEstimated House Price:")
print(f"${estimated_price:.2f}")


Estimated House Price:
$234978.54


In [7]:
# Let's predict the price of a house for a specific scenario
# Here are the features for this specific example
house_features = {
    'MedInc': [8.0],        # Median Income
    'HouseAge': [10.0],     # House Age in years
    'AveRooms': [3.0],      # Average number of rooms
    'AveBedrms': [3.0],     # Average number of bedrooms
    'Population': [1000.0], # Population of the area
    'AveOccup': [4.0],      # Average occupancy
    'Latitude': [34.0],     # Latitude coordinate
    'Longitude': [-118.0]   # Longitude coordinate
}

# Create a DataFrame from the features, ensuring the columns are in the same order
house_df = pd.DataFrame(house_features, columns=features)

# Scale the features using the same scaler used for training
house_scaled = scaler.transform(house_df)

# Let's predict the house price using the trained model
price_prediction = model.predict(house_scaled)

# Since 'MedHouseVal' is in units of $100,000, multiply the prediction to get the actual price
estimated_price = price_prediction[0] * 100000

print("\nEstimated House Price:")
print(f"${estimated_price:.2f}")


Estimated House Price:
$553130.11
