### California House Price Prediction Project  
This project uses extensive usuage of numpy, pandas libraries along with data vizualization and linear regression.

In [None]:
import numpy as np 
import pandas as pd

import plotly.express as px 
import plotly.graph_objects as go
import plotly.io as pio
pio.templates

import seaborn as sns 
import matplotlib.pyplot as plt 
%matplotlib inline 

In [None]:
#from sklearn.datasets import load_boston 
#load_boston = load_boston() 
from sklearn.datasets import fetch_california_housing
housing = fetch_california_housing()
X = housing.data 
y = housing.target 

data = pd.DataFrame(X, columns=housing.feature_names) 
data["SalePrice"] = y # saleprice 
data.head() 

In [None]:
compression_opts = dict(method='zip',
                        archive_name='out.csv')  
data.to_csv('out.zip', index=False,
          compression=compression_opts)  

In [None]:
print(housing.DESCR)

### Data Correlation

In [None]:
plt.figure(figsize=(10,10))
cor = data.corr()
sns.heatmap(cor, annot=True, cmap=plt.cm.PuBu)
plt.show()

In [None]:
cor_target = abs(cor["SalePrice"]) # absolute value of the correlation 

relevant_features = cor_target[cor_target>0.2] # highly correlated features 

names = [index for index, value in relevant_features.items()] # getting the names of the features 

names.remove('SalePrice') # removing target feature 

print(names) # printing the features 
print(len(names))

### Model Building 

In [None]:
from sklearn.model_selection import train_test_split 

X = data.drop("SalePrice", axis=1) 
y = data["SalePrice"]
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=42)

In [None]:
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

In [None]:
from sklearn.linear_model import LinearRegression 

lr = LinearRegression() 
lr.fit(X_train, y_train)

In [None]:
predictions = lr.predict(X_test)  

print("Actual value of the house:- ", y_test[0]) 
print("Model Predicted Value:- ", predictions[0])

In [None]:
from sklearn.metrics import mean_squared_error

mse = mean_squared_error(y_test, predictions) 
rmse = np.sqrt(mse)
print(rmse)

In [None]:
from sklearn.metrics import r2_score

# Assuming y_test is the actual target values, and y_pred is the predicted values
r2 = r2_score(y_test, predictions)

# Convert R² to a percentage
accuracy = r2 * 100
print(f"Model Accuracy (R² as %): {accuracy:.2f}%")