In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Sample dataset
data = {
    'size': [1400, 1600, 1700, 1875, 1100, 1550, 2350, 2450, 1425, 1700],
    'bedrooms': [3, 3, 2, 4, 2, 3, 4, 5, 3, 3],
    'location': [1, 1, 2, 3, 1, 2, 2, 3, 1, 2],  # 1: suburban, 2: urban, 3: rural
    'price': [200000, 250000, 220000, 300000, 180000, 240000, 350000, 380000, 210000, 260000]
}

df = pd.DataFrame(data)

# Prepare the features (X) and target variable (y)
X = df[['size', 'bedrooms', 'location']]
y = df['price']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and train the model
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"Mean squared error: {mse:.2f}")
print(f"R-squared score: {r2:.2f}")

# Predict price for a new house
new_house = [[1800, 3, 2]]  # 1800 sq ft, 3 bedrooms, urban location
predicted_price = model.predict(new_house)
print(f"Predicted price for the new house: ${predicted_price[0]:.2f}")


Mean squared error: 128318889.36
R-squared score: 0.68
Predicted price for the new house: $263906.95


