# Imports

In [90]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import numpy as np
import matplotlib.pyplot as plt

# Load the dataset

In [139]:
# Set the number of samples
n = 1000
noise_range=3
# Generate random features
sqft = np.random.randint(500, 5000, size=n)
bedrooms = np.random.randint(1, 6, size=n)
bathrooms = np.random.choice([1, 2, 3], size=n)
garage = np.random.choice([0, 1, 2, 3], size=n)
year_built = np.random.randint(1950, 2022, size=n)

# Define a linear regression function
def linear_regression(x, y, m=1, b=0, noise=0):
    y_pred = m * x + b
    noise = np.random.normal(scale=noise, size=y_pred.shape)
    return np.int64(y_pred + noise)

# Generate random labels using the linear regression function
sale_price = linear_regression(sqft, 100, m=50, b=50000, noise=np.random.randint(0,noise_range)) + \
             linear_regression(bedrooms, 100, m=5000, noise=np.random.randint(0,noise_range)) + \
             linear_regression(bathrooms, 100, m=3000, noise=np.random.randint(0,noise_range)) + \
             linear_regression(garage, 100, m=2000, noise=np.random.randint(0,noise_range)) + \
             linear_regression(year_built, 100, m=-100, b=200000, noise=np.random.randint(0,noise_range))

# Combine the features and labels into a NumPy array
data = np.column_stack((sqft, bedrooms, bathrooms, garage, year_built, sale_price))
df = pd.DataFrame(data,columns=('sqft', 'bedrooms', 'bathrooms', 'garage', 'year_built', 'sale_price'))
df

Unnamed: 0,sqft,bedrooms,bathrooms,garage,year_built,sale_price
0,4128,4,1,3,1977,287701
1,1641,3,3,3,1972,164853
2,1180,3,2,0,1985,131500
3,3108,1,3,2,2018,221598
4,4192,4,2,1,1964,291198
...,...,...,...,...,...,...
995,3374,2,1,3,1976,240103
996,2256,2,2,0,1971,181701
997,1348,2,1,2,2017,132701
998,1319,2,3,2,2018,137149


# Split the data into training and testing sets

In [140]:
X_train, X_test, y_train, y_test = train_test_split(df[['sqft', 'bedrooms', 'bathrooms', 'garage', 'year_built']], df['sale_price'], test_size=0.2, random_state=42)
X_train.shape,X_test.shape

((800, 5), (200, 5))

# Training Model

In [141]:
# Create linear regression object
regressor = LinearRegression()

# Train the model using the training sets
regressor.fit(X_train, y_train)

# Make predictions using the testing set

In [142]:
# Make predictions using the testing set
y_pred = regressor.predict(X_test)

# Evaluate the model using mean squared error
mse = mean_squared_error(y_test, y_pred)
print('Mean squared error: ', mse)

# Print the coefficients and intercept
print('Coefficients: ', regressor.coef_)
print('Intercept: ', regressor.intercept_)

Mean squared error:  8.953278658915371
Coefficients:  [  50.00009456 5000.00295923 2999.94012079 1999.88586537  -99.98090235]
Intercept:  249960.88455242134
