# Predict house price using Linear regression

In [10]:
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error,r2_score
import numpy as np

In [12]:
# Load the dataset
housing=fetch_california_housing(as_frame=True)

In [14]:
#Create a dataframe from the dataset
df=housing.frame
print(df.head())

   MedInc  HouseAge  AveRooms  AveBedrms  Population  AveOccup  Latitude  \
0  8.3252      41.0  6.984127   1.023810       322.0  2.555556     37.88   
1  8.3014      21.0  6.238137   0.971880      2401.0  2.109842     37.86   
2  7.2574      52.0  8.288136   1.073446       496.0  2.802260     37.85   
3  5.6431      52.0  5.817352   1.073059       558.0  2.547945     37.85   
4  3.8462      52.0  6.281853   1.081081       565.0  2.181467     37.85   

   Longitude  MedHouseVal  
0    -122.23        4.526  
1    -122.22        3.585  
2    -122.24        3.521  
3    -122.25        3.413  
4    -122.25        3.422  


In [20]:
# features(independent variable) and target(dependent variable)
x=df.drop('MedHouseVal',axis=1)
y=df['MedHouseVal']

#split the dataset into training and test 
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)

In [22]:
#Train the LinearRegression model
model=LinearRegression()
model.fit(x_train,y_train)

In [24]:
#make prediction on the test set
y_predict=model.predict(x_test)
print(y_predict)

[0.71912284 1.76401657 2.70965883 ... 4.46877017 1.18751119 2.00940251]


In [26]:
#Evaluate the model using mean_squared_error and r2_score
mse=mean_squared_error(y_test,y_predict)
r2=r2_score(y_test,y_predict)
print(mse)
print(r2)

0.555891598695244
0.5757877060324511


In [32]:
#Display the coefficent of the model
print(f"Intercept: {model.intercept_}")
#deisplay the coffecient of each features
print(f"{model.coef_}")

coef_df=pd.DataFrame(model.coef_,x.columns,columns=['Coefficent'])
print(coef_df)

Intercept: -37.02327770606413
[ 4.48674910e-01  9.72425752e-03 -1.23323343e-01  7.83144907e-01
 -2.02962058e-06 -3.52631849e-03 -4.19792487e-01 -4.33708065e-01]
            Coefficent
MedInc        0.448675
HouseAge      0.009724
AveRooms     -0.123323
AveBedrms     0.783145
Population   -0.000002
AveOccup     -0.003526
Latitude     -0.419792
Longitude    -0.433708


In [34]:
#Test the model with new data
new_data=pd.DataFrame({
    'MedInc':[5],
    'HouseAge':[30],
    'AveRooms':[6],
    'AveBedrms':[1],
    'Population':[500],
    'AveOccup':[3],
    'Latitude':[34.05],
    'Longitude':[-118.25]
})
predicted_price=model.predict(new_data)
print(f'\n\npredicted house price : ${predicted_price[0]:,.2f}')



predicted house price : $2.54
