In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_squared_error

print("Housing Price Prediction Model")
try:
    df = pd.read_csv("/content/housing.csv")
    print("Dataset loaded successfully.")
except FileNotFoundError:
    print("Error: File not found.")
    exit()
except pd.errors.EmptyDataError:
    print("Error: File is empty.")
    exit()
except pd.errors.ParserError:
    print("Error: File is corrupted or improperly formatted.")
    exit()

print("\nDataset Information:")
print(df.info())

df = df.dropna()

df = pd.get_dummies(df, columns=['ocean_proximity'], drop_first=True)

X = df.drop(columns=['median_house_value'])
y = df['median_house_value']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

model = LinearRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

r2 = r2_score(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)

print("\nModel Evaluation:")
print(f"R-squared Score      : {r2:.4f}")
print(f"Mean Squared Error   : {mse:.2f}")


Housing Price Prediction Model
Dataset loaded successfully.

Dataset Information:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20640 entries, 0 to 20639
Data columns (total 10 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   longitude           20640 non-null  float64
 1   latitude            20640 non-null  float64
 2   housing_median_age  20640 non-null  float64
 3   total_rooms         20640 non-null  float64
 4   total_bedrooms      20433 non-null  float64
 5   population          20640 non-null  float64
 6   households          20640 non-null  float64
 7   median_income       20640 non-null  float64
 8   median_house_value  20640 non-null  float64
 9   ocean_proximity     20640 non-null  object 
dtypes: float64(9), object(1)
memory usage: 1.6+ MB
None

Model Evaluation:
R-squared Score      : 0.6488
Mean Squared Error   : 4802173538.60
