In [1]:
# Task 2: Linear Regression – California Housing Dataset

import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.preprocessing import StandardScaler

# Load the dataset
data = fetch_california_housing()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = data.target

# Scale the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42
)

# Train the Linear Regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
r2 = r2_score(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)

print("✅ Model Trained Successfully")
print("R² Score:", r2)
print("Mean Squared Error:", mse)

# Display the coefficients
coefficients = pd.DataFrame({
    'Feature': data.feature_names,
    'Coefficient': model.coef_
})
print("\nFeature Coefficients:")
print(coefficients)

✅ Model Trained Successfully
R² Score: 0.5757877060324511
Mean Squared Error: 0.555891598695244

Feature Coefficients:
      Feature  Coefficient
0      MedInc     0.852382
1    HouseAge     0.122382
2    AveRooms    -0.305116
3   AveBedrms     0.371132
4  Population    -0.002298
5    AveOccup    -0.036624
6    Latitude    -0.896635
7   Longitude    -0.868927
