In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Sample Data (Replace with your actual house price data)
data = {
    'area': [1000, 1500, 1200, 1800, 2000, 1100, 1600, 1900, 1300, 1700],
    'bedrooms': [2, 3, 2, 4, 4, 2, 3, 4, 3, 3],
    'bathrooms': [1, 2, 1, 2, 3, 1, 2, 2, 2, 2],
    'location': ['A', 'B', 'A', 'C', 'B', 'A', 'B', 'C', 'A', 'C'],  # Location Feature
    'price': [250000, 350000, 300000, 450000, 500000, 275000, 380000, 470000, 320000, 420000],
}
df = pd.DataFrame(data)

# Convert location to numerical using one-hot encoding
df = pd.get_dummies(df, columns=['location'], drop_first=True)

# Separate features (X) and target variable (y)
X = df.drop('price', axis=1)
y = df['price']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')


# Predict the price of a house in a different area
new_house_data = pd.DataFrame({
    'area': [1400],
    'bedrooms': [3],
    'bathrooms': [2],
    'location_B': [0],  # Assuming location is not 'A' or 'C' so both one hot encoded values are 0
    'location_C': [1], # Assuming location is C so one hot encoded value is 1
})

predicted_price = model.predict(new_house_data)
print(f'Predicted price for the new house: {predicted_price[0]}')