<a href="https://colab.research.google.com/github/vcsen0101509/House-Price-Prediction-ML/blob/main/House_price_prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from ipywidgets import interact_manual  # For interactive input in Colab

# 📌 Load Dataset from CSV (Ensure you upload the file in Colab)
file_path = "/content/CALIFONIA HOUSE PRICES.csv"
df = pd.read_csv(file_path)

# 📌 Display first few rows to check dataset
print("Dataset Preview:")
print(df.head())

# 📌 Remove missing values
df = df.dropna()

# 📌 Define Features (X) and Target (y)
feature_columns = [
    'Median_Income', 'Median_Age', 'Tot_Rooms', 'Tot_Bedrooms', 'Population',
    'Households', 'Latitude', 'Longitude', 'Distance_to_coast', 'Distance_to_LA',
    'Distance_to_SanDiego', 'Distance_to_SanJose', 'Distance_to_SanFrancisco'
]
target_column = 'price'

X = df[feature_columns]  # Independent variables
y = df[target_column]    # Dependent variable (House price)

# 📌 Apply Polynomial Features (Degree = 1 to prevent overfitting)
poly = PolynomialFeatures(degree=1, include_bias=False)
X_poly = poly.fit_transform(X)
poly_feature_names = poly.get_feature_names_out(feature_columns)

# 📌 Convert to DataFrame with feature names
X_poly_df = pd.DataFrame(X_poly, columns=poly_feature_names)

# 📌 Train-Test Split (80% Train, 20% Test)
X_train, X_test, y_train, y_test = train_test_split(X_poly_df, y, test_size=0.2, random_state=42)

# 📌 Standardize Data (Feature Scaling)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 📌 Train Linear Regression Model
model = LinearRegression()
model.fit(X_train_scaled, y_train)

# 📌 Make Predictions
y_pred = model.predict(X_test_scaled)

# 📌 Evaluate Model Performance
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)

print("\n📊 Model Performance Metrics:")
print(f"🔹 Mean Absolute Error (MAE): {mae}")
print(f"🔹 Root Mean Squared Error (RMSE): {rmse}")
print(f"🔹 R² Score: {r2}")

# 📌 Interactive Price Prediction (For Google Colab)
def predict_price(Median_Income, Median_Age, Tot_Rooms, Tot_Bedrooms, Population,
                  Households, Latitude, Longitude, Distance_to_coast, Distance_to_LA,
                  Distance_to_SanDiego, Distance_to_SanJose, Distance_to_SanFrancisco):

    user_input_df = pd.DataFrame([[Median_Income, Median_Age, Tot_Rooms, Tot_Bedrooms, Population,
                                    Households, Latitude, Longitude, Distance_to_coast, Distance_to_LA,
                                    Distance_to_SanDiego, Distance_to_SanJose, Distance_to_SanFrancisco]],
                                    columns=feature_columns)

    user_input_poly = poly.transform(user_input_df)  # Apply Polynomial Transformation
    user_input_scaled = scaler.transform(user_input_poly)  # Apply Scaling
    prediction = model.predict(user_input_scaled)  # Predict Price

    print(f"\n🏠 Estimated House Price: ${prediction[0]:,.2f}")

# 📌 Run Interactive Input in Google Colab
interact_manual(
    predict_price,
    Median_Income=(0, 15, 0.1),
    Median_Age=(0, 100, 1),
    Tot_Rooms=(0, 10000, 10),
    Tot_Bedrooms=(0, 2000, 10),
    Population=(0, 10000, 50),
    Households=(0, 5000, 10),
    Latitude=(32, 42, 0.1),
    Longitude=(-125, -114, 0.1),
    Distance_to_coast=(0, 10000, 10),
    Distance_to_LA=(0, 800000, 1000),
    Distance_to_SanDiego=(0, 1000000, 1000),
    Distance_to_SanJose=(0, 800000, 1000),
    Distance_to_SanFrancisco=(0, 300000, 1000),
);


Dataset Preview:
      price  Median_Income  Median_Age  Tot_Rooms  Tot_Bedrooms  Population  \
0  452600.0         8.3252          41        880           129         322   
1  358500.0         8.3014          21       7099          1106        2401   
2  352100.0         7.2574          52       1467           190         496   
3  341300.0         5.6431          52       1274           235         558   
4  342200.0         3.8462          52       1627           280         565   

   Households  Latitude  Longitude  Distance_to_coast  Distance_to_LA  \
0         126     37.88    -122.23        9263.040773     556529.1583   
1        1138     37.86    -122.22       10225.733070     554279.8501   
2         177     37.85    -122.24        8259.085109     554610.7171   
3         219     37.85    -122.25        7768.086571     555194.2661   
4         259     37.85    -122.25        7768.086571     555194.2661   

   Distance_to_SanDiego  Distance_to_SanJose  Distance_to_SanFrancisc

interactive(children=(FloatSlider(value=7.0, description='Median_Income', max=15.0), IntSlider(value=50, descr…