In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score


In [None]:
data = pd.read_csv("sample_data/california_housing_train.csv")
print(data.head())

   longitude  latitude  housing_median_age  total_rooms  total_bedrooms  \
0    -114.31     34.19                15.0       5612.0          1283.0   
1    -114.47     34.40                19.0       7650.0          1901.0   
2    -114.56     33.69                17.0        720.0           174.0   
3    -114.57     33.64                14.0       1501.0           337.0   
4    -114.57     33.57                20.0       1454.0           326.0   

   population  households  median_income  median_house_value  
0      1015.0       472.0         1.4936             66900.0  
1      1129.0       463.0         1.8200             80100.0  
2       333.0       117.0         1.6509             85700.0  
3       515.0       226.0         3.1917             73400.0  
4       624.0       262.0         1.9250             65500.0  


In [None]:
X = data[['total_rooms', 'total_bedrooms', 'population', 'households', 'median_income']]
y = data['median_house_value']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [None]:
model = LinearRegression()
model.fit(X_train, y_train)


In [None]:
y_pred = model.predict(X_test)


In [None]:
print("MAE:", mean_absolute_error(y_test, y_pred))
print("MSE:", mean_squared_error(y_test, y_pred))
print("R2 Score:", r2_score(y_test, y_pred))


MAE: 57856.405693383094
MSE: 6098672216.020681
R2 Score: 0.557388822940465


In [None]:
# Example: New house details for prediction
# The model was trained on 'total_rooms', 'total_bedrooms', 'population', 'households', 'median_income'
# We need to provide values for all 5 features.

# Let's assume 'area' maps to 'total_rooms' and 'bedrooms' maps to 'total_bedrooms'.
# We'll need to provide example values for 'population', 'households', and 'median_income'.

new_total_rooms = 1600
new_total_bedrooms = 300 # Example value, assuming some total bedrooms for 3 personal bedrooms
new_population = 800   # Example value
new_households = 250   # Example value
new_median_income = 4.5  # Example value (e.g., $45,000)

# Create a DataFrame for prediction input with appropriate column names
new_data_for_prediction = pd.DataFrame([
    [new_total_rooms, new_total_bedrooms, new_population, new_households, new_median_income]
], columns=['total_rooms', 'total_bedrooms', 'population', 'households', 'median_income'])

# Pass the DataFrame to the model for prediction
price = model.predict(new_data_for_prediction)
print("Predicted House Price:", price[0])

Predicted House Price: 225827.38281168044
