In [24]:
import numpy as np
import pandas as pd

# Defining the number of data points
num_data_points = 100

# Generating random data for square footage, bedrooms, and bathrooms
np.random.seed(42)  # for reproducibility
square_footage = np.random.randint(800, 3000, num_data_points)
bedrooms = np.random.randint(1, 6, num_data_points)
bathrooms = np.random.randint(1, 4, num_data_points)

# Generating prices based on the formula: price = 100 * square_footage + 20000 * bedrooms + 15000 * bathrooms + random_noise
random_noise = np.random.normal(0, 10000, num_data_points)  # Adding some random noise
price = 100 * square_footage + 20000 * bedrooms + 15000 * bathrooms + random_noise

# Creating a DataFrame to store the data
data = {
    'SquareFootage': square_footage,
    'Bedrooms': bedrooms,
    'Bathrooms': bathrooms,
    'Price': price
}

df = pd.DataFrame(data)

# Displaying the first few rows of the generated dataset
print(df.head())

# Saving the dataset to a CSV file
df.to_csv('house_prices_dataset.csv', index=False)


   SquareFootage  Bedrooms  Bathrooms          Price
0           1660         2          1  231455.711683
1           2094         2          3  290978.582874
2           1930         4          2  293739.533808
3           1895         5          1  299370.346824
4           2438         3          3  355901.092404


In [25]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

In [26]:
df = pd.read_csv('house_prices_dataset.csv')

In [27]:
X = df[['SquareFootage', 'Bedrooms', 'Bathrooms']]
y = df['Price']

In [28]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [29]:
model = LinearRegression()
model.fit(X_train, y_train)

In [30]:
y_pred = model.predict(X_test)

In [32]:
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Mean Squared Error:", mse)
print("R-squared:", r2)

Mean Squared Error: 116010738.06377856
R-squared: 0.9744712988801278


In [33]:
print("Coefficients:", model.coef_)
print("Intercept:", model.intercept_)

Coefficients: [  100.2098746  19235.23494845 12363.86499121]
Intercept: 7771.145988883742
