In [None]:
# ---------------------------------------------
# 🏠 House Price Prediction - Interactive
# ---------------------------------------------
# In this session, we will:
# 1. Generate a realistic house dataset
# 2. Explore and visualize the data
# 3. Build a regression model to predict price
# 4. Evaluate the model & get insights
# 5. Predict price based on user input
# ---------------------------------------------

# Step 1: Import Required Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
print("✅ Step 1: Libraries imported successfully!")


In [None]:

# Step 2: Create a Realistic Dataset
print("\n--- Step 2: Creating Dataset ---")
np.random.seed(42)  # for reproducibility
n = 200  # number of houses

# Generate features
size = np.random.randint(600, 5000, n)
bedrooms = np.random.randint(1, 7, n)
age = np.random.randint(0, 40, n)

# Price formula
price = (size * 120) + (bedrooms * 10000) - (age * 800) + np.random.randint(-20000, 20000, n)

# Create DataFrame
df = pd.DataFrame({
    "Size_sqft": size,
    "Bedrooms": bedrooms,
    "Age_years": age,
    "Price_USD": price
})
print("✅ Dataset Created Successfully!")
# --- Show Output for Step 2 ---
print("First 5 rows of the dataset:")
print(df.head())



In [None]:
# Step 3: Basic Dataset Info
print("\n--- Step 3: Basic Dataset Info ---")
# --- Show Output for Step 3 ---
print("Dataset Information (datatypes, non-null counts):")
df.info()
print("\nSummary Statistics (count, mean, std, etc.):")
print(df.describe())


In [None]:

# Step 4: Exploratory Data Analysis (EDA)
print("\n--- Step 4: Exploratory Data Analysis (EDA) ---")
print("Generating plots... please check the pop-up windows.")

# --- Distribution of Price
plt.figure(figsize=(6,4))
sns.histplot(df["Price_USD"], bins=30, kde=True, color="blue")
plt.title("Distribution of House Prices")
plt.xlabel("Price (USD)")
plt.show()

# --- Pairplot to check relationships
sns.pairplot(df)
plt.suptitle("Pairwise Feature Relationships", y=1.02)
plt.show()

# --- Correlation Heatmap
plt.figure(figsize=(6,4))
sns.heatmap(df.corr(), annot=True, cmap="coolwarm", fmt=".2f")
plt.title("Correlation Heatmap")
plt.show()

print("✅ EDA plots have been displayed.")


In [None]:

# Step 5: Prepare Data for Modeling
print("\n--- Step 5: Preparing Data for Modeling ---")
X = df[["Size_sqft", "Bedrooms", "Age_years"]]  # Features
y = df["Price_USD"]                            # Target variable

# Split data into training (80%) and testing (20%)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)
print("Data split into training and testing sets.")
# --- Show Output for Step 5 ---
print(f"Training set shape: {X_train.shape}")
print(f"Testing set shape: {X_test.shape}")


In [None]:

# Step 6: Train Linear Regression Model
print("\n--- Step 6: Training the Linear Regression Model ---")
model = LinearRegression()
model.fit(X_train, y_train)
print("✅ Model Training Complete!")


In [None]:

# Step 7: Make Predictions on Test Data
print("\n--- Step 7: Making Predictions on the Test Set ---")
y_pred = model.predict(X_test)
print("✅ Predictions made successfully.")
# --- Show Output for Step 7 ---
# Create a DataFrame to show a few actual vs. predicted values
predictions_df = pd.DataFrame({'Actual Price': y_test, 'Predicted Price': y_pred})
predictions_df['Difference'] = predictions_df['Actual Price'] - predictions_df['Predicted Price']
print("Sample of predictions vs actual prices:")
print(predictions_df.head())



In [None]:

# Step 8: Evaluate Model
print("\n--- Step 8: Evaluating Model Performance ---")
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
# --- Show Output for Step 8 ---
print("Model Evaluation Metrics:")
print(f"Mean Squared Error (MSE): {mse:,.2f}")
print(f"R-squared (R²) Score: {r2:.4f}")


In [None]:

# Step 9: Visualize Predictions vs Actual
print("\n--- Step 9: Visualizing Predictions vs Actual Prices ---")
print("Generating plot... please check the pop-up window.")
plt.figure(figsize=(6,4))
plt.scatter(y_test, y_pred, color="purple", alpha=0.6)
plt.xlabel("Actual Prices (USD)")
plt.ylabel("Predicted Prices (USD)")
plt.title("Actual vs Predicted Prices")
plt.plot([y.min(), y.max()], [y.min(), y.max()], 'k--', lw=2)  # reference line
plt.show()
print("✅ Prediction plot displayed.")


In [None]:

# Step 10: Coefficients (Feature Importance)
print("\n--- Step 10: Analyzing Model Coefficients ---")
coeff_df = pd.DataFrame(model.coef_, X.columns, columns=["Coefficient"])
# --- Show Output for Step 10 ---
print("Feature Importance (Model Coefficients):")
print(coeff_df)
print("\nThis means for every 1 unit increase:")
print(f"- Size: Price increases by ~${coeff_df.loc['Size_sqft'][0]:.2f}")
print(f"- Bedrooms: Price increases by ~${coeff_df.loc['Bedrooms'][0]:.2f}")
print(f"- Age: Price decreases by ~${-coeff_df.loc['Age_years'][0]:.2f}")


In [None]:

# Step 11: Predict New House Price from User Input
print("\n" + "="*45)
print("💡 PREDICT THE PRICE OF YOUR HOUSE 💡")
print("="*45)

while True:
    try:
        user_size = int(input("Enter the size of the house in sq.ft: "))
        user_bedrooms = int(input("Enter the number of bedrooms: "))
        user_age = int(input("Enter the age of the house in years: "))

        new_house_features = [[user_size, user_bedrooms, user_age]]
        predicted_price = model.predict(new_house_features)

        print("\n----------------------------------------------------")
        print(f"✨ Predicted price for the house is: ${predicted_price[0]:,.2f}")
        print("----------------------------------------------------\n")

        another = input("Do you want to predict another house price? (yes/no): ").lower()
        if another != 'yes':
            print("Thank you for using the House Price Predictor! 👋")
            break

    except ValueError:
        print("\n❌ Invalid input. Please enter valid numbers for all fields. Let's try again.\n")