<a href="https://colab.research.google.com/github/Kavya-sri-05/genai/blob/main/housingprice_using_sunthetic.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [19]:
# 📦 Import required libraries
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import mean_squared_error

# 🔢 Generate synthetic data
np.random.seed(42)
num_samples = 200

data = {
    'size_sqft': np.random.randint(600, 4000, num_samples),
    'bedrooms': np.random.randint(1, 6, num_samples),
    'neighborhood': np.random.choice(['A', 'B', 'C'], num_samples),
    'age_years': np.random.randint(0, 50, num_samples),
}

# Synthetic price generation logic
# Larger houses in better neighborhoods and fewer years old = higher price
base_price = (
    data['size_sqft'] * 200 +
    data['bedrooms'] * 10000 +
    [30000 if n == 'A' else 20000 if n == 'B' else 10000 for n in data['neighborhood']] -
    data['age_years'] * 1000 +
    np.random.normal(0, 10000, num_samples)  # Add noise
)

# Add target variable
data['price'] = base_price.astype(int)

# 🔍 Convert to DataFrame
df = pd.DataFrame(data)
print("🔢 First few rows of data:\n", df.head())

# 🧼 One-hot encode categorical column
df_encoded = pd.get_dummies(df, columns=['neighborhood'], drop_first=True)

# ✂️ Split features and target
X = df_encoded.drop('price', axis=1)
y = df_encoded['price']

# 🔀 Split into training and test data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 🧠 Train linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# 🔮 Predict on test data
y_pred = model.predict(X_test)

# 📊 Evaluate
mse = mean_squared_error(y_test, y_pred)
print(f"\n📉 Mean Squared Error: {mse:.2f}")

# 🧾 Predict new house price from user input
try:
    size = float(input("\nEnter house size (sqft): "))
    bedrooms = int(input("Enter number of bedrooms: "))
    age = int(input("Enter age of house (in years): "))
    neighborhood = input("Enter neighborhood (A, B, or C): ").upper()

    # Prepare input row
    user_data = {
        'size_sqft': [size],
        'bedrooms': [bedrooms],
        'age_years': [age],
        'neighborhood_B': [1 if neighborhood == 'B' else 0],
        'neighborhood_C': [1 if neighborhood == 'C' else 0]
    }

    user_df = pd.DataFrame(user_data)
    prediction = model.predict(user_df)[0]
    print(f"\n🏡 Estimated House Price: ${prediction:,.2f}")

except Exception as e:
    print("⚠️ Error with input:", e)


🔢 First few rows of data:
    size_sqft  bedrooms neighborhood  age_years   price
0       3774         1            B         34  757253
1       1460         1            B         22  313686
2       1894         1            A         28  381150
3       1730         3            B         42  360860
4       1695         1            B         10  369584

📉 Mean Squared Error: 85595315.28

Enter house size (sqft): 1500
Enter number of bedrooms: 2
Enter age of house (in years): 10
Enter neighborhood (A, B, or C): A

🏡 Estimated House Price: $338,255.47
