### Housing Prediction

#### Import Required Libraries

In [11]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, r2_score

#### Load California Housing Dataset from GitHub

In [12]:
url = "https://raw.githubusercontent.com/ArnabMp/mlops/main/dataset/housing.csv"
df = pd.read_csv(url)
df.head()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value,ocean_proximity
0,-122.23,37.88,41.0,880.0,129.0,322.0,126.0,8.3252,452600.0,NEAR BAY
1,-122.22,37.86,21.0,7099.0,1106.0,2401.0,1138.0,8.3014,358500.0,NEAR BAY
2,-122.24,37.85,52.0,1467.0,190.0,496.0,177.0,7.2574,352100.0,NEAR BAY
3,-122.25,37.85,52.0,1274.0,235.0,558.0,219.0,5.6431,341300.0,NEAR BAY
4,-122.25,37.85,52.0,1627.0,280.0,565.0,259.0,3.8462,342200.0,NEAR BAY


#### Data Preprocessing

In [15]:
# Remove NaN rows
df = df.dropna()

# Handle categorical column 'ocean_proximity' using one-hot encoding
df_encoded = pd.get_dummies(df, columns=["ocean_proximity"], drop_first=True)

# Define features and target
X = df_encoded.drop("median_house_value", axis=1)
y = df_encoded["median_house_value"]

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#### Linear Regression

In [16]:
lin_reg = LinearRegression()
lin_reg.fit(X_train, y_train)
y_pred_lin = lin_reg.predict(X_test)

#### Decision Tree Regressor

In [17]:
tree_reg = DecisionTreeRegressor(random_state=42)
tree_reg.fit(X_train, y_train)
y_pred_tree = tree_reg.predict(X_test)

#### Model Evaluation

In [18]:
print("🔹 Linear Regression Results:")
print("R² score:", r2_score(y_test, y_pred_lin))
print("RMSE:", mean_squared_error(y_test, y_pred_lin, squared=False))

print("\n🔹 Decision Tree Regressor Results:")
print("R² score:", r2_score(y_test, y_pred_tree))
print("RMSE:", mean_squared_error(y_test, y_pred_tree, squared=False))

🔹 Linear Regression Results:
R² score: 0.6488402154431991
RMSE: 69297.71669113032

🔹 Decision Tree Regressor Results:
R² score: 0.6688309933664495
RMSE: 67296.33003700843


