In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score

In [None]:
housing = fetch_california_housing()

In [None]:
print(housing.data.shape, housing.target.shape)

In [None]:
df = pd.DataFrame(housing.data, columns=housing.feature_names)
df['MedHouseValue'] = housing.target

In [None]:
df.info()

In [None]:
df.head()

In [None]:
df.describe()

In [None]:
df.columns

In [None]:
# Visualize the data
plt.scatter(df.AveBedrms, df.MedHouseValue)
plt.xlabel("X")
plt.ylabel("y")
plt.title("Average Bedrooms x MedHouseValue")
plt.show()

In [None]:
# Visualize the data
plt.scatter(df.AveRooms, df.MedHouseValue)
plt.xlabel("X")
plt.ylabel("y")
plt.title("Average Rooms x MedHouseValue")
plt.show()

In [None]:
# Visualize the data
plt.scatter(df.HouseAge, df.MedHouseValue)
plt.xlabel("X")
plt.ylabel("y")
plt.title("House Age x MedHouseValue")
plt.show()

In [None]:
df.isnull().sum()

In [None]:
model = LinearRegression()
X = df.drop("MedHouseValue", axis=1).copy()
y = df["MedHouseValue"]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=42)

In [None]:
model.fit(X_train, y_train)

In [None]:
y_pred = model.predict(X_test)

In [None]:
# Metrics
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse:.2f}")
print(f"R-squared: {r2:.2f}")

In [None]:
plt.scatter(y_test, y_pred, alpha=0.5)
plt.xlabel("Actual Median House Value")
plt.ylabel("Predicted Median House Value")
plt.title("Predicted vs Actual Values")
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--')
plt.show()

Train it in respect of only one column

In [None]:
model1 = LinearRegression()
X1 = X.drop("MedInc", axis=1)

In [None]:
X_train1, X_test1, y_train1, y_test1 = train_test_split(X1,y, test_size=0.2, random_state=42)

In [None]:
model1.fit(X_train1, y_train1)

In [None]:
y_pred1 = model1.predict(X_test1)

In [None]:
# Metrics
mse1 = mean_squared_error(y_test1, y_pred1)
r21 = r2_score(y_test1, y_pred1)

print(f"Mean Squared Error: {mse1:.2f}")
print(f"R-squared: {r21:.2f}")