# House Price Prediction

Problem Statement: House price prediction - take  California Housing Price(dataset) built model to predict house price? Evaluate the performance of the model.

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt

In [None]:
# Load the dataset
housing = pd.read_csv('housing.csv')
housing.head()

In [None]:
# Data Preprocessing Handling missing values
housing = housing.dropna()

In [None]:
# Convert categorical variable 'ocean_proximity' into dummy/indicator variables
housing = pd.get_dummies(housing, columns=['ocean_proximity'], drop_first=True)

In [None]:
# Split the data into features and target variable
X = housing.drop('median_house_value', axis=1)
y = housing['median_house_value']

In [None]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Feature Scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
# Build the model using Linear Regression
model = LinearRegression()
model.fit(X_train, y_train)

In [None]:
# Predicting the house prices for the test set
y_pred = model.predict(X_test)

In [None]:
# Evaluate the performance of the model
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print(f"Root Mean Squared Error (RMSE): {rmse}")
print(f"R^2 Score: {r2}")

In [None]:
# Visualizing the results
plt.scatter(y_test, y_pred, alpha=0.3)
plt.xlabel("Actual Prices")
plt.ylabel("Predicted Prices")
plt.title("Actual Prices vs Predicted Prices")
plt.show()